Files
profile-data/decode.json
2025-03-21 10:22:50 +08:00

1 line
4.4 MiB

{"schemaVersion": 1, "deviceProperties": [{"id": 0, "name": "NVIDIA H800", "totalGlobalMem": 84943110144, "computeMajor": 9, "computeMinor": 0, "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, "regsPerBlock": 65536, "regsPerMultiprocessor": 65536, "warpSize": 32, "sharedMemPerBlock": 49152, "sharedMemPerMultiprocessor": 233472, "numSms": 132, "sharedMemPerBlockOptin": 232448}], "distributedInfo": {"backend": "nccl", "rank": 0, "world_size": 128}, "traceEvents": [{"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672414846, "dur": 19, "args": {"External id": 2655, "Ev Idx": 2654}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672414871, "dur": 107, "args": {"External id": 2656, "Ev Idx": 2655}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672414876, "dur": 102, "args": {"External id": 2657, "Ev Idx": 2656}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672414883, "dur": 25, "args": {"External id": 2658, "Ev Idx": 2657}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672414911, "dur": 66, "args": {"External id": 2659, "Ev Idx": 2658}}, {"ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 494, "tid": 494, "ts": 1742522672414982, "dur": 0, "args": {"External id": 2660, "Ev Idx": 2659}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 494, "tid": 494, "ts": 1742522672414986, "dur": 28, "args": {"External id": 2661, "Ev Idx": 2660, "Fwd thread id": 0, "Sequence number": 5459}}, {"ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 494, "tid": 494, "ts": 1742522672415010, "dur": 4, "args": {"External id": 2662, "Ev Idx": 2661}}, {"ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 494, "tid": 494, "ts": 1742522672415055, "dur": 282, "args": {"External id": 2663, "Ev Idx": 2662, "Fwd thread id": 0, "Sequence number": 5459}}, {"ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 494, "tid": 494, "ts": 1742522672415094, "dur": 236, "args": {"External id": 2664, "Ev Idx": 2663}}, {"ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 494, "tid": 494, "ts": 1742522672415132, "dur": 189, "args": {"External id": 2665, "Ev Idx": 2664}}, {"ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 494, "tid": 494, "ts": 1742522672415346, "dur": 4, "args": {"External id": 2666, "Ev Idx": 2665}}, {"ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 494, "tid": 494, "ts": 1742522672415364, "dur": 157, "args": {"External id": 2667, "Ev Idx": 2666, "Fwd thread id": 0, "Sequence number": 5459}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 494, "tid": 494, "ts": 1742522672415366, "dur": 154, "args": {"External id": 2668, "Ev Idx": 2667, "Fwd thread id": 0, "Sequence number": 5459}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scalar_tensor", "pid": 494, "tid": 494, "ts": 1742522672415560, "dur": 69, "args": {"External id": 2669, "Ev Idx": 2668}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672415564, "dur": 12, "args": {"External id": 2670, "Ev Idx": 2669}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672415579, "dur": 50, "args": {"External id": 2671, "Ev Idx": 2670}}, {"ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 494, "tid": 494, "ts": 1742522672415630, "dur": 112, "args": {"External id": 2672, "Ev Idx": 2671, "Fwd thread id": 0, "Sequence number": 5459}}, {"ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 494, "tid": 494, "ts": 1742522672415636, "dur": 104, "args": {"External id": 2673, "Ev Idx": 2672}}, {"ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 494, "tid": 494, "ts": 1742522672415646, "dur": 90, "args": {"External id": 2674, "Ev Idx": 2673}}, {"ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 494, "tid": 494, "ts": 1742522672415744, "dur": 2, "args": {"External id": 2675, "Ev Idx": 2674}}, {"ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 494, "tid": 494, "ts": 1742522672415751, "dur": 84, "args": {"External id": 2676, "Ev Idx": 2675, "Fwd thread id": 0, "Sequence number": 5459}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 494, "tid": 494, "ts": 1742522672415752, "dur": 82, "args": {"External id": 2677, "Ev Idx": 2676, "Fwd thread id": 0, "Sequence number": 5459}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416113, "dur": 3, "args": {"External id": 2678, "Ev Idx": 2677}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416117, "dur": 0, "args": {"External id": 2679, "Ev Idx": 2678}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416118, "dur": 0, "args": {"External id": 2680, "Ev Idx": 2679}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416118, "dur": 1, "args": {"External id": 2681, "Ev Idx": 2680}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416119, "dur": 0, "args": {"External id": 2682, "Ev Idx": 2681}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416120, "dur": 0, "args": {"External id": 2683, "Ev Idx": 2682}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416120, "dur": 0, "args": {"External id": 2684, "Ev Idx": 2683}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416121, "dur": 0, "args": {"External id": 2685, "Ev Idx": 2684}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416121, "dur": 1, "args": {"External id": 2686, "Ev Idx": 2685}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416153, "dur": 40, "args": {"External id": 2687, "Ev Idx": 2686}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416158, "dur": 34, "args": {"External id": 2688, "Ev Idx": 2687}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416159, "dur": 12, "args": {"External id": 2689, "Ev Idx": 2688}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416172, "dur": 20, "args": {"External id": 2690, "Ev Idx": 2689}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416193, "dur": 17, "args": {"External id": 2691, "Ev Idx": 2690}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416194, "dur": 16, "args": {"External id": 2692, "Ev Idx": 2691}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416195, "dur": 3, "args": {"External id": 2693, "Ev Idx": 2692}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416198, "dur": 12, "args": {"External id": 2694, "Ev Idx": 2693}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416211, "dur": 15, "args": {"External id": 2695, "Ev Idx": 2694}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416211, "dur": 15, "args": {"External id": 2696, "Ev Idx": 2695}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416212, "dur": 2, "args": {"External id": 2697, "Ev Idx": 2696}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416215, "dur": 11, "args": {"External id": 2698, "Ev Idx": 2697}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416227, "dur": 15, "args": {"External id": 2699, "Ev Idx": 2698}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416227, "dur": 15, "args": {"External id": 2700, "Ev Idx": 2699}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416228, "dur": 3, "args": {"External id": 2701, "Ev Idx": 2700}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416231, "dur": 10, "args": {"External id": 2702, "Ev Idx": 2701}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416242, "dur": 1, "args": {"External id": 2703, "Ev Idx": 2702}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416243, "dur": 19, "args": {"External id": 2704, "Ev Idx": 2703}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416244, "dur": 18, "args": {"External id": 2705, "Ev Idx": 2704}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416244, "dur": 3, "args": {"External id": 2706, "Ev Idx": 2705}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416248, "dur": 14, "args": {"External id": 2707, "Ev Idx": 2706}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416263, "dur": 15, "args": {"External id": 2708, "Ev Idx": 2707}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416263, "dur": 15, "args": {"External id": 2709, "Ev Idx": 2708}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416264, "dur": 2, "args": {"External id": 2710, "Ev Idx": 2709}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416267, "dur": 11, "args": {"External id": 2711, "Ev Idx": 2710}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416278, "dur": 5, "args": {"External id": 2712, "Ev Idx": 2711}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416279, "dur": 4, "args": {"External id": 2713, "Ev Idx": 2712}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416279, "dur": 3, "args": {"External id": 2714, "Ev Idx": 2713}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416282, "dur": 0, "args": {"External id": 2715, "Ev Idx": 2714}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416283, "dur": 16, "args": {"External id": 2716, "Ev Idx": 2715}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416284, "dur": 15, "args": {"External id": 2717, "Ev Idx": 2716}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416284, "dur": 3, "args": {"External id": 2718, "Ev Idx": 2717}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416288, "dur": 11, "args": {"External id": 2719, "Ev Idx": 2718}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416418, "dur": 19, "args": {"External id": 2720, "Ev Idx": 2719}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416419, "dur": 18, "args": {"External id": 2721, "Ev Idx": 2720}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416420, "dur": 3, "args": {"External id": 2722, "Ev Idx": 2721}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416424, "dur": 13, "args": {"External id": 2723, "Ev Idx": 2722}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416438, "dur": 15, "args": {"External id": 2724, "Ev Idx": 2723}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416439, "dur": 14, "args": {"External id": 2725, "Ev Idx": 2724}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416439, "dur": 3, "args": {"External id": 2726, "Ev Idx": 2725}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416442, "dur": 10, "args": {"External id": 2727, "Ev Idx": 2726}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416454, "dur": 15, "args": {"External id": 2728, "Ev Idx": 2727}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416454, "dur": 15, "args": {"External id": 2729, "Ev Idx": 2728}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416455, "dur": 2, "args": {"External id": 2730, "Ev Idx": 2729}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416458, "dur": 10, "args": {"External id": 2731, "Ev Idx": 2730}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416470, "dur": 4, "args": {"External id": 2732, "Ev Idx": 2731}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416471, "dur": 3, "args": {"External id": 2733, "Ev Idx": 2732}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416471, "dur": 2, "args": {"External id": 2734, "Ev Idx": 2733}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416474, "dur": 0, "args": {"External id": 2735, "Ev Idx": 2734}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416476, "dur": 14, "args": {"External id": 2736, "Ev Idx": 2735}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416476, "dur": 14, "args": {"External id": 2737, "Ev Idx": 2736}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416476, "dur": 3, "args": {"External id": 2738, "Ev Idx": 2737}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416479, "dur": 11, "args": {"External id": 2739, "Ev Idx": 2738}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416491, "dur": 4, "args": {"External id": 2740, "Ev Idx": 2739}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416492, "dur": 3, "args": {"External id": 2741, "Ev Idx": 2740}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416492, "dur": 2, "args": {"External id": 2742, "Ev Idx": 2741}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416494, "dur": 1, "args": {"External id": 2743, "Ev Idx": 2742}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416496, "dur": 15, "args": {"External id": 2744, "Ev Idx": 2743}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416497, "dur": 14, "args": {"External id": 2745, "Ev Idx": 2744}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416497, "dur": 3, "args": {"External id": 2746, "Ev Idx": 2745}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416500, "dur": 11, "args": {"External id": 2747, "Ev Idx": 2746}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416670, "dur": 10, "args": {"External id": 2748, "Ev Idx": 2747}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416676, "dur": 3, "args": {"External id": 2749, "Ev Idx": 2748}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416685, "dur": 21, "args": {"External id": 2750, "Ev Idx": 2749}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416709, "dur": 1, "args": {"External id": 2751, "Ev Idx": 2750}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416710, "dur": 0, "args": {"External id": 2752, "Ev Idx": 2751}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672416717, "dur": 3, "args": {"External id": 2753, "Ev Idx": 2752}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416721, "dur": 1, "args": {"External id": 2754, "Ev Idx": 2753}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416722, "dur": 0, "args": {"External id": 2755, "Ev Idx": 2754}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416724, "dur": 8, "args": {"External id": 2756, "Ev Idx": 2755}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416734, "dur": 1, "args": {"External id": 2757, "Ev Idx": 2756}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416735, "dur": 0, "args": {"External id": 2758, "Ev Idx": 2757}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672416737, "dur": 1, "args": {"External id": 2759, "Ev Idx": 2758}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416740, "dur": 1, "args": {"External id": 2760, "Ev Idx": 2759}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416741, "dur": 0, "args": {"External id": 2761, "Ev Idx": 2760}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416743, "dur": 6, "args": {"External id": 2762, "Ev Idx": 2761}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416751, "dur": 1, "args": {"External id": 2763, "Ev Idx": 2762}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416752, "dur": 0, "args": {"External id": 2764, "Ev Idx": 2763}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672416754, "dur": 1, "args": {"External id": 2765, "Ev Idx": 2764}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416758, "dur": 1, "args": {"External id": 2766, "Ev Idx": 2765}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416759, "dur": 0, "args": {"External id": 2767, "Ev Idx": 2766}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416761, "dur": 7, "args": {"External id": 2768, "Ev Idx": 2767}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416770, "dur": 1, "args": {"External id": 2769, "Ev Idx": 2768}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416771, "dur": 0, "args": {"External id": 2770, "Ev Idx": 2769}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672416773, "dur": 2, "args": {"External id": 2771, "Ev Idx": 2770}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416778, "dur": 0, "args": {"External id": 2772, "Ev Idx": 2771}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416778, "dur": 0, "args": {"External id": 2773, "Ev Idx": 2772}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416781, "dur": 0, "args": {"External id": 2774, "Ev Idx": 2773}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416781, "dur": 0, "args": {"External id": 2775, "Ev Idx": 2774}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416783, "dur": 6, "args": {"External id": 2776, "Ev Idx": 2775}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416791, "dur": 1, "args": {"External id": 2777, "Ev Idx": 2776}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416791, "dur": 0, "args": {"External id": 2778, "Ev Idx": 2777}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416793, "dur": 1, "args": {"External id": 2779, "Ev Idx": 2778}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416794, "dur": 0, "args": {"External id": 2780, "Ev Idx": 2779}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416795, "dur": 1, "args": {"External id": 2781, "Ev Idx": 2780}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416798, "dur": 0, "args": {"External id": 2782, "Ev Idx": 2781}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416798, "dur": 0, "args": {"External id": 2783, "Ev Idx": 2782}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672416800, "dur": 1, "args": {"External id": 2784, "Ev Idx": 2783}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416806, "dur": 1, "args": {"External id": 2785, "Ev Idx": 2784}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416807, "dur": 0, "args": {"External id": 2786, "Ev Idx": 2785}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672416809, "dur": 32, "args": {"External id": 2787, "Ev Idx": 2786}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416844, "dur": 3, "args": {"External id": 2788, "Ev Idx": 2787}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416845, "dur": 2, "args": {"External id": 2789, "Ev Idx": 2788}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416849, "dur": 0, "args": {"External id": 2790, "Ev Idx": 2789}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416849, "dur": 0, "args": {"External id": 2791, "Ev Idx": 2790}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416851, "dur": 19, "args": {"External id": 2792, "Ev Idx": 2791}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416873, "dur": 0, "args": {"External id": 2793, "Ev Idx": 2792}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416873, "dur": 0, "args": {"External id": 2794, "Ev Idx": 2793}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416874, "dur": 0, "args": {"External id": 2795, "Ev Idx": 2794}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416874, "dur": 0, "args": {"External id": 2796, "Ev Idx": 2795}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672416876, "dur": 1, "args": {"External id": 2797, "Ev Idx": 2796}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416876, "dur": 0, "args": {"External id": 2798, "Ev Idx": 2797}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416878, "dur": 0, "args": {"External id": 2799, "Ev Idx": 2798}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_has_compatible_shallow_copy_type", "pid": 494, "tid": 494, "ts": 1742522672416885, "dur": 1, "args": {"External id": 2800, "Ev Idx": 2799}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416898, "dur": 29, "args": {"External id": 2801, "Ev Idx": 2800}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416899, "dur": 28, "args": {"External id": 2802, "Ev Idx": 2801}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416900, "dur": 4, "args": {"External id": 2803, "Ev Idx": 2802}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416905, "dur": 22, "args": {"External id": 2804, "Ev Idx": 2803}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672416928, "dur": 2, "args": {"External id": 2805, "Ev Idx": 2804}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416934, "dur": 34, "args": {"External id": 2806, "Ev Idx": 2805}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672416936, "dur": 7, "args": {"External id": 2807, "Ev Idx": 2806}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672416937, "dur": 6, "args": {"External id": 2808, "Ev Idx": 2807}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672416938, "dur": 1, "args": {"External id": 2809, "Ev Idx": 2808}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416939, "dur": 4, "args": {"External id": 2810, "Ev Idx": 2809}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 494, "tid": 494, "ts": 1742522672416946, "dur": 8, "args": {"External id": 2811, "Ev Idx": 2810}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672416949, "dur": 4, "args": {"External id": 2812, "Ev Idx": 2811}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672416952, "dur": 1, "args": {"External id": 2813, "Ev Idx": 2812}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672416955, "dur": 12, "args": {"External id": 2814, "Ev Idx": 2813}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672416988, "dur": 30, "args": {"External id": 2815, "Ev Idx": 2814}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672417019, "dur": 6, "args": {"External id": 2816, "Ev Idx": 2815}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672421259, "dur": 3, "args": {"External id": 2817, "Ev Idx": 2816}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672421272, "dur": 12, "args": {"External id": 2818, "Ev Idx": 2817}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672421274, "dur": 9, "args": {"External id": 2819, "Ev Idx": 2818}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672421426, "dur": 0, "args": {"External id": 2820, "Ev Idx": 2819}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672421466, "dur": 1, "args": {"External id": 2821, "Ev Idx": 2820}}, {"ph": "X", "cat": "cpu_op", "name": "aten::index_select", "pid": 494, "tid": 494, "ts": 1742522672421471, "dur": 51, "args": {"External id": 2822, "Ev Idx": 2821}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672421474, "dur": 4, "args": {"External id": 2823, "Ev Idx": 2822}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672421486, "dur": 6, "args": {"External id": 2824, "Ev Idx": 2823}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672421532, "dur": 4, "args": {"External id": 2825, "Ev Idx": 2824}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672421656, "dur": 4, "args": {"External id": 2826, "Ev Idx": 2825}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672421658, "dur": 1, "args": {"External id": 2827, "Ev Idx": 2826}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672421823, "dur": 45, "args": {"External id": 2828, "Ev Idx": 2827}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421878, "dur": 0, "args": {"External id": 2829, "Ev Idx": 2828}}, {"ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 494, "tid": 494, "ts": 1742522672421879, "dur": 0, "args": {"External id": 2830, "Ev Idx": 2829}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 494, "tid": 494, "ts": 1742522672421880, "dur": 1, "args": {"External id": 2831, "Ev Idx": 2830}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672421884, "dur": 1, "args": {"External id": 2832, "Ev Idx": 2831}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421892, "dur": 0, "args": {"External id": 2833, "Ev Idx": 2832}}, {"ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 494, "tid": 494, "ts": 1742522672421893, "dur": 0, "args": {"External id": 2834, "Ev Idx": 2833}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 494, "tid": 494, "ts": 1742522672421893, "dur": 0, "args": {"External id": 2835, "Ev Idx": 2834}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672421895, "dur": 1, "args": {"External id": 2836, "Ev Idx": 2835}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421903, "dur": 0, "args": {"External id": 2837, "Ev Idx": 2836}}, {"ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 494, "tid": 494, "ts": 1742522672421903, "dur": 0, "args": {"External id": 2838, "Ev Idx": 2837}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 494, "tid": 494, "ts": 1742522672421904, "dur": 0, "args": {"External id": 2839, "Ev Idx": 2838}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672421905, "dur": 1, "args": {"External id": 2840, "Ev Idx": 2839}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421913, "dur": 0, "args": {"External id": 2841, "Ev Idx": 2840}}, {"ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 494, "tid": 494, "ts": 1742522672421913, "dur": 0, "args": {"External id": 2842, "Ev Idx": 2841}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 494, "tid": 494, "ts": 1742522672421914, "dur": 0, "args": {"External id": 2843, "Ev Idx": 2842}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672421915, "dur": 1, "args": {"External id": 2844, "Ev Idx": 2843}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421923, "dur": 0, "args": {"External id": 2845, "Ev Idx": 2844}}, {"ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 494, "tid": 494, "ts": 1742522672421923, "dur": 1, "args": {"External id": 2846, "Ev Idx": 2845}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 494, "tid": 494, "ts": 1742522672421924, "dur": 0, "args": {"External id": 2847, "Ev Idx": 2846}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672421926, "dur": 0, "args": {"External id": 2848, "Ev Idx": 2847}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421934, "dur": 0, "args": {"External id": 2849, "Ev Idx": 2848}}, {"ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 494, "tid": 494, "ts": 1742522672421934, "dur": 0, "args": {"External id": 2850, "Ev Idx": 2849}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 494, "tid": 494, "ts": 1742522672421934, "dur": 0, "args": {"External id": 2851, "Ev Idx": 2850}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421938, "dur": 31, "args": {"External id": 2852, "Ev Idx": 2851}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672421939, "dur": 30, "args": {"External id": 2853, "Ev Idx": 2852}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672421941, "dur": 6, "args": {"External id": 2854, "Ev Idx": 2853}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672421947, "dur": 21, "args": {"External id": 2855, "Ev Idx": 2854}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421971, "dur": 11, "args": {"External id": 2856, "Ev Idx": 2855}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672421971, "dur": 11, "args": {"External id": 2857, "Ev Idx": 2856}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672421972, "dur": 3, "args": {"External id": 2858, "Ev Idx": 2857}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672421975, "dur": 6, "args": {"External id": 2859, "Ev Idx": 2858}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421983, "dur": 10, "args": {"External id": 2860, "Ev Idx": 2859}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672421983, "dur": 10, "args": {"External id": 2861, "Ev Idx": 2860}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672421984, "dur": 3, "args": {"External id": 2862, "Ev Idx": 2861}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672421987, "dur": 6, "args": {"External id": 2863, "Ev Idx": 2862}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672421994, "dur": 11, "args": {"External id": 2864, "Ev Idx": 2863}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672421995, "dur": 9, "args": {"External id": 2865, "Ev Idx": 2864}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672421995, "dur": 3, "args": {"External id": 2866, "Ev Idx": 2865}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672421999, "dur": 5, "args": {"External id": 2867, "Ev Idx": 2866}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672422006, "dur": 9, "args": {"External id": 2868, "Ev Idx": 2867}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672422006, "dur": 9, "args": {"External id": 2869, "Ev Idx": 2868}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672422006, "dur": 3, "args": {"External id": 2870, "Ev Idx": 2869}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672422009, "dur": 6, "args": {"External id": 2871, "Ev Idx": 2870}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672422017, "dur": 9, "args": {"External id": 2872, "Ev Idx": 2871}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672422017, "dur": 9, "args": {"External id": 2873, "Ev Idx": 2872}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672422017, "dur": 3, "args": {"External id": 2874, "Ev Idx": 2873}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672422021, "dur": 5, "args": {"External id": 2875, "Ev Idx": 2874}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672422066, "dur": 2, "args": {"External id": 2876, "Ev Idx": 2875}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422067, "dur": 1, "args": {"External id": 2877, "Ev Idx": 2876}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672422070, "dur": 1, "args": {"External id": 2878, "Ev Idx": 2877}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422071, "dur": 0, "args": {"External id": 2879, "Ev Idx": 2878}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unbind", "pid": 494, "tid": 494, "ts": 1742522672422080, "dur": 106, "args": {"External id": 2880, "Ev Idx": 2879}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422083, "dur": 8, "args": {"External id": 2881, "Ev Idx": 2880}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422090, "dur": 0, "args": {"External id": 2882, "Ev Idx": 2881}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422091, "dur": 1, "args": {"External id": 2883, "Ev Idx": 2882}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422092, "dur": 0, "args": {"External id": 2884, "Ev Idx": 2883}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422092, "dur": 1, "args": {"External id": 2885, "Ev Idx": 2884}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422093, "dur": 0, "args": {"External id": 2886, "Ev Idx": 2885}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422093, "dur": 0, "args": {"External id": 2887, "Ev Idx": 2886}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422093, "dur": 0, "args": {"External id": 2888, "Ev Idx": 2887}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422094, "dur": 0, "args": {"External id": 2889, "Ev Idx": 2888}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422094, "dur": 0, "args": {"External id": 2890, "Ev Idx": 2889}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422094, "dur": 1, "args": {"External id": 2891, "Ev Idx": 2890}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422095, "dur": 0, "args": {"External id": 2892, "Ev Idx": 2891}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422095, "dur": 1, "args": {"External id": 2893, "Ev Idx": 2892}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422095, "dur": 1, "args": {"External id": 2894, "Ev Idx": 2893}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422096, "dur": 0, "args": {"External id": 2895, "Ev Idx": 2894}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422096, "dur": 0, "args": {"External id": 2896, "Ev Idx": 2895}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422097, "dur": 0, "args": {"External id": 2897, "Ev Idx": 2896}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422097, "dur": 0, "args": {"External id": 2898, "Ev Idx": 2897}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422097, "dur": 1, "args": {"External id": 2899, "Ev Idx": 2898}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422098, "dur": 0, "args": {"External id": 2900, "Ev Idx": 2899}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422098, "dur": 1, "args": {"External id": 2901, "Ev Idx": 2900}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422098, "dur": 0, "args": {"External id": 2902, "Ev Idx": 2901}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422099, "dur": 0, "args": {"External id": 2903, "Ev Idx": 2902}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422099, "dur": 0, "args": {"External id": 2904, "Ev Idx": 2903}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422100, "dur": 0, "args": {"External id": 2905, "Ev Idx": 2904}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422100, "dur": 0, "args": {"External id": 2906, "Ev Idx": 2905}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422100, "dur": 1, "args": {"External id": 2907, "Ev Idx": 2906}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422100, "dur": 1, "args": {"External id": 2908, "Ev Idx": 2907}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422101, "dur": 0, "args": {"External id": 2909, "Ev Idx": 2908}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422101, "dur": 0, "args": {"External id": 2910, "Ev Idx": 2909}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422102, "dur": 0, "args": {"External id": 2911, "Ev Idx": 2910}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422102, "dur": 0, "args": {"External id": 2912, "Ev Idx": 2911}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422102, "dur": 1, "args": {"External id": 2913, "Ev Idx": 2912}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422103, "dur": 0, "args": {"External id": 2914, "Ev Idx": 2913}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422103, "dur": 1, "args": {"External id": 2915, "Ev Idx": 2914}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422103, "dur": 1, "args": {"External id": 2916, "Ev Idx": 2915}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422104, "dur": 0, "args": {"External id": 2917, "Ev Idx": 2916}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422104, "dur": 0, "args": {"External id": 2918, "Ev Idx": 2917}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422105, "dur": 0, "args": {"External id": 2919, "Ev Idx": 2918}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422105, "dur": 0, "args": {"External id": 2920, "Ev Idx": 2919}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422105, "dur": 1, "args": {"External id": 2921, "Ev Idx": 2920}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422106, "dur": 0, "args": {"External id": 2922, "Ev Idx": 2921}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422106, "dur": 1, "args": {"External id": 2923, "Ev Idx": 2922}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422106, "dur": 0, "args": {"External id": 2924, "Ev Idx": 2923}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422107, "dur": 0, "args": {"External id": 2925, "Ev Idx": 2924}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422107, "dur": 0, "args": {"External id": 2926, "Ev Idx": 2925}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422108, "dur": 0, "args": {"External id": 2927, "Ev Idx": 2926}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422108, "dur": 0, "args": {"External id": 2928, "Ev Idx": 2927}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422108, "dur": 1, "args": {"External id": 2929, "Ev Idx": 2928}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422108, "dur": 1, "args": {"External id": 2930, "Ev Idx": 2929}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422109, "dur": 0, "args": {"External id": 2931, "Ev Idx": 2930}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422109, "dur": 0, "args": {"External id": 2932, "Ev Idx": 2931}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422110, "dur": 0, "args": {"External id": 2933, "Ev Idx": 2932}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422110, "dur": 0, "args": {"External id": 2934, "Ev Idx": 2933}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422110, "dur": 1, "args": {"External id": 2935, "Ev Idx": 2934}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422111, "dur": 0, "args": {"External id": 2936, "Ev Idx": 2935}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422111, "dur": 1, "args": {"External id": 2937, "Ev Idx": 2936}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422111, "dur": 0, "args": {"External id": 2938, "Ev Idx": 2937}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422112, "dur": 0, "args": {"External id": 2939, "Ev Idx": 2938}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422112, "dur": 0, "args": {"External id": 2940, "Ev Idx": 2939}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422112, "dur": 1, "args": {"External id": 2941, "Ev Idx": 2940}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422113, "dur": 0, "args": {"External id": 2942, "Ev Idx": 2941}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422113, "dur": 1, "args": {"External id": 2943, "Ev Idx": 2942}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422113, "dur": 1, "args": {"External id": 2944, "Ev Idx": 2943}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422114, "dur": 1, "args": {"External id": 2945, "Ev Idx": 2944}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422114, "dur": 0, "args": {"External id": 2946, "Ev Idx": 2945}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422115, "dur": 0, "args": {"External id": 2947, "Ev Idx": 2946}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422115, "dur": 0, "args": {"External id": 2948, "Ev Idx": 2947}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422115, "dur": 1, "args": {"External id": 2949, "Ev Idx": 2948}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422116, "dur": 0, "args": {"External id": 2950, "Ev Idx": 2949}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422116, "dur": 1, "args": {"External id": 2951, "Ev Idx": 2950}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422116, "dur": 0, "args": {"External id": 2952, "Ev Idx": 2951}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422117, "dur": 0, "args": {"External id": 2953, "Ev Idx": 2952}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422117, "dur": 0, "args": {"External id": 2954, "Ev Idx": 2953}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422118, "dur": 0, "args": {"External id": 2955, "Ev Idx": 2954}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422118, "dur": 0, "args": {"External id": 2956, "Ev Idx": 2955}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422118, "dur": 1, "args": {"External id": 2957, "Ev Idx": 2956}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422118, "dur": 1, "args": {"External id": 2958, "Ev Idx": 2957}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422119, "dur": 0, "args": {"External id": 2959, "Ev Idx": 2958}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422119, "dur": 0, "args": {"External id": 2960, "Ev Idx": 2959}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422120, "dur": 0, "args": {"External id": 2961, "Ev Idx": 2960}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422120, "dur": 0, "args": {"External id": 2962, "Ev Idx": 2961}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422120, "dur": 1, "args": {"External id": 2963, "Ev Idx": 2962}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422121, "dur": 0, "args": {"External id": 2964, "Ev Idx": 2963}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422121, "dur": 0, "args": {"External id": 2965, "Ev Idx": 2964}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422121, "dur": 0, "args": {"External id": 2966, "Ev Idx": 2965}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422122, "dur": 0, "args": {"External id": 2967, "Ev Idx": 2966}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422122, "dur": 0, "args": {"External id": 2968, "Ev Idx": 2967}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422123, "dur": 0, "args": {"External id": 2969, "Ev Idx": 2968}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422123, "dur": 0, "args": {"External id": 2970, "Ev Idx": 2969}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422123, "dur": 1, "args": {"External id": 2971, "Ev Idx": 2970}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422124, "dur": 0, "args": {"External id": 2972, "Ev Idx": 2971}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422124, "dur": 0, "args": {"External id": 2973, "Ev Idx": 2972}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422124, "dur": 0, "args": {"External id": 2974, "Ev Idx": 2973}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422125, "dur": 0, "args": {"External id": 2975, "Ev Idx": 2974}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422125, "dur": 0, "args": {"External id": 2976, "Ev Idx": 2975}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422125, "dur": 1, "args": {"External id": 2977, "Ev Idx": 2976}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422126, "dur": 0, "args": {"External id": 2978, "Ev Idx": 2977}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422126, "dur": 1, "args": {"External id": 2979, "Ev Idx": 2978}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422126, "dur": 0, "args": {"External id": 2980, "Ev Idx": 2979}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422127, "dur": 0, "args": {"External id": 2981, "Ev Idx": 2980}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422127, "dur": 0, "args": {"External id": 2982, "Ev Idx": 2981}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422127, "dur": 1, "args": {"External id": 2983, "Ev Idx": 2982}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422128, "dur": 0, "args": {"External id": 2984, "Ev Idx": 2983}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422128, "dur": 1, "args": {"External id": 2985, "Ev Idx": 2984}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422128, "dur": 0, "args": {"External id": 2986, "Ev Idx": 2985}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422129, "dur": 0, "args": {"External id": 2987, "Ev Idx": 2986}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422129, "dur": 0, "args": {"External id": 2988, "Ev Idx": 2987}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422129, "dur": 1, "args": {"External id": 2989, "Ev Idx": 2988}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422130, "dur": 0, "args": {"External id": 2990, "Ev Idx": 2989}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422130, "dur": 1, "args": {"External id": 2991, "Ev Idx": 2990}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422130, "dur": 1, "args": {"External id": 2992, "Ev Idx": 2991}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422131, "dur": 1, "args": {"External id": 2993, "Ev Idx": 2992}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422131, "dur": 0, "args": {"External id": 2994, "Ev Idx": 2993}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422132, "dur": 0, "args": {"External id": 2995, "Ev Idx": 2994}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422132, "dur": 0, "args": {"External id": 2996, "Ev Idx": 2995}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422133, "dur": 0, "args": {"External id": 2997, "Ev Idx": 2996}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422133, "dur": 0, "args": {"External id": 2998, "Ev Idx": 2997}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422133, "dur": 1, "args": {"External id": 2999, "Ev Idx": 2998}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422133, "dur": 1, "args": {"External id": 3000, "Ev Idx": 2999}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422134, "dur": 0, "args": {"External id": 3001, "Ev Idx": 3000}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422134, "dur": 0, "args": {"External id": 3002, "Ev Idx": 3001}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422135, "dur": 0, "args": {"External id": 3003, "Ev Idx": 3002}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422135, "dur": 0, "args": {"External id": 3004, "Ev Idx": 3003}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422135, "dur": 1, "args": {"External id": 3005, "Ev Idx": 3004}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422136, "dur": 0, "args": {"External id": 3006, "Ev Idx": 3005}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422136, "dur": 0, "args": {"External id": 3007, "Ev Idx": 3006}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422136, "dur": 0, "args": {"External id": 3008, "Ev Idx": 3007}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422137, "dur": 0, "args": {"External id": 3009, "Ev Idx": 3008}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422137, "dur": 0, "args": {"External id": 3010, "Ev Idx": 3009}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422137, "dur": 1, "args": {"External id": 3011, "Ev Idx": 3010}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422138, "dur": 0, "args": {"External id": 3012, "Ev Idx": 3011}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422138, "dur": 1, "args": {"External id": 3013, "Ev Idx": 3012}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422138, "dur": 0, "args": {"External id": 3014, "Ev Idx": 3013}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422139, "dur": 0, "args": {"External id": 3015, "Ev Idx": 3014}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422139, "dur": 0, "args": {"External id": 3016, "Ev Idx": 3015}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422140, "dur": 0, "args": {"External id": 3017, "Ev Idx": 3016}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422140, "dur": 0, "args": {"External id": 3018, "Ev Idx": 3017}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422140, "dur": 1, "args": {"External id": 3019, "Ev Idx": 3018}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422141, "dur": 0, "args": {"External id": 3020, "Ev Idx": 3019}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422141, "dur": 0, "args": {"External id": 3021, "Ev Idx": 3020}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422141, "dur": 0, "args": {"External id": 3022, "Ev Idx": 3021}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422142, "dur": 0, "args": {"External id": 3023, "Ev Idx": 3022}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422142, "dur": 0, "args": {"External id": 3024, "Ev Idx": 3023}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422142, "dur": 1, "args": {"External id": 3025, "Ev Idx": 3024}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422143, "dur": 0, "args": {"External id": 3026, "Ev Idx": 3025}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422143, "dur": 1, "args": {"External id": 3027, "Ev Idx": 3026}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422143, "dur": 0, "args": {"External id": 3028, "Ev Idx": 3027}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422144, "dur": 0, "args": {"External id": 3029, "Ev Idx": 3028}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422144, "dur": 0, "args": {"External id": 3030, "Ev Idx": 3029}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422144, "dur": 1, "args": {"External id": 3031, "Ev Idx": 3030}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422145, "dur": 0, "args": {"External id": 3032, "Ev Idx": 3031}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422145, "dur": 1, "args": {"External id": 3033, "Ev Idx": 3032}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422145, "dur": 1, "args": {"External id": 3034, "Ev Idx": 3033}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422146, "dur": 0, "args": {"External id": 3035, "Ev Idx": 3034}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422146, "dur": 0, "args": {"External id": 3036, "Ev Idx": 3035}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422147, "dur": 0, "args": {"External id": 3037, "Ev Idx": 3036}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422147, "dur": 0, "args": {"External id": 3038, "Ev Idx": 3037}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422147, "dur": 1, "args": {"External id": 3039, "Ev Idx": 3038}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422148, "dur": 0, "args": {"External id": 3040, "Ev Idx": 3039}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422148, "dur": 1, "args": {"External id": 3041, "Ev Idx": 3040}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422148, "dur": 1, "args": {"External id": 3042, "Ev Idx": 3041}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422149, "dur": 0, "args": {"External id": 3043, "Ev Idx": 3042}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422149, "dur": 0, "args": {"External id": 3044, "Ev Idx": 3043}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422150, "dur": 0, "args": {"External id": 3045, "Ev Idx": 3044}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422150, "dur": 0, "args": {"External id": 3046, "Ev Idx": 3045}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422150, "dur": 1, "args": {"External id": 3047, "Ev Idx": 3046}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422151, "dur": 0, "args": {"External id": 3048, "Ev Idx": 3047}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422151, "dur": 0, "args": {"External id": 3049, "Ev Idx": 3048}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422151, "dur": 0, "args": {"External id": 3050, "Ev Idx": 3049}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422152, "dur": 0, "args": {"External id": 3051, "Ev Idx": 3050}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422152, "dur": 0, "args": {"External id": 3052, "Ev Idx": 3051}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422153, "dur": 0, "args": {"External id": 3053, "Ev Idx": 3052}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422153, "dur": 0, "args": {"External id": 3054, "Ev Idx": 3053}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422153, "dur": 1, "args": {"External id": 3055, "Ev Idx": 3054}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422154, "dur": 0, "args": {"External id": 3056, "Ev Idx": 3055}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422154, "dur": 1, "args": {"External id": 3057, "Ev Idx": 3056}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422154, "dur": 0, "args": {"External id": 3058, "Ev Idx": 3057}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422155, "dur": 0, "args": {"External id": 3059, "Ev Idx": 3058}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422155, "dur": 0, "args": {"External id": 3060, "Ev Idx": 3059}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422155, "dur": 1, "args": {"External id": 3061, "Ev Idx": 3060}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422156, "dur": 0, "args": {"External id": 3062, "Ev Idx": 3061}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422156, "dur": 1, "args": {"External id": 3063, "Ev Idx": 3062}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422156, "dur": 1, "args": {"External id": 3064, "Ev Idx": 3063}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422157, "dur": 1, "args": {"External id": 3065, "Ev Idx": 3064}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422157, "dur": 0, "args": {"External id": 3066, "Ev Idx": 3065}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422158, "dur": 0, "args": {"External id": 3067, "Ev Idx": 3066}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422158, "dur": 0, "args": {"External id": 3068, "Ev Idx": 3067}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422159, "dur": 0, "args": {"External id": 3069, "Ev Idx": 3068}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422159, "dur": 0, "args": {"External id": 3070, "Ev Idx": 3069}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422159, "dur": 1, "args": {"External id": 3071, "Ev Idx": 3070}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422159, "dur": 1, "args": {"External id": 3072, "Ev Idx": 3071}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422162, "dur": 1, "args": {"External id": 3073, "Ev Idx": 3072}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422162, "dur": 1, "args": {"External id": 3074, "Ev Idx": 3073}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422163, "dur": 0, "args": {"External id": 3075, "Ev Idx": 3074}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422163, "dur": 0, "args": {"External id": 3076, "Ev Idx": 3075}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422164, "dur": 0, "args": {"External id": 3077, "Ev Idx": 3076}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422164, "dur": 0, "args": {"External id": 3078, "Ev Idx": 3077}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422164, "dur": 1, "args": {"External id": 3079, "Ev Idx": 3078}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422165, "dur": 0, "args": {"External id": 3080, "Ev Idx": 3079}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422165, "dur": 1, "args": {"External id": 3081, "Ev Idx": 3080}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422166, "dur": 0, "args": {"External id": 3082, "Ev Idx": 3081}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422166, "dur": 1, "args": {"External id": 3083, "Ev Idx": 3082}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422167, "dur": 0, "args": {"External id": 3084, "Ev Idx": 3083}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422167, "dur": 0, "args": {"External id": 3085, "Ev Idx": 3084}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422167, "dur": 0, "args": {"External id": 3086, "Ev Idx": 3085}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422168, "dur": 0, "args": {"External id": 3087, "Ev Idx": 3086}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422168, "dur": 0, "args": {"External id": 3088, "Ev Idx": 3087}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422169, "dur": 0, "args": {"External id": 3089, "Ev Idx": 3088}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422169, "dur": 0, "args": {"External id": 3090, "Ev Idx": 3089}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422170, "dur": 0, "args": {"External id": 3091, "Ev Idx": 3090}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422170, "dur": 0, "args": {"External id": 3092, "Ev Idx": 3091}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422170, "dur": 1, "args": {"External id": 3093, "Ev Idx": 3092}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422171, "dur": 0, "args": {"External id": 3094, "Ev Idx": 3093}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422171, "dur": 0, "args": {"External id": 3095, "Ev Idx": 3094}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422171, "dur": 0, "args": {"External id": 3096, "Ev Idx": 3095}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422172, "dur": 0, "args": {"External id": 3097, "Ev Idx": 3096}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422172, "dur": 0, "args": {"External id": 3098, "Ev Idx": 3097}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422172, "dur": 1, "args": {"External id": 3099, "Ev Idx": 3098}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422173, "dur": 0, "args": {"External id": 3100, "Ev Idx": 3099}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422173, "dur": 1, "args": {"External id": 3101, "Ev Idx": 3100}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422173, "dur": 0, "args": {"External id": 3102, "Ev Idx": 3101}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422174, "dur": 0, "args": {"External id": 3103, "Ev Idx": 3102}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422174, "dur": 0, "args": {"External id": 3104, "Ev Idx": 3103}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422174, "dur": 1, "args": {"External id": 3105, "Ev Idx": 3104}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422175, "dur": 0, "args": {"External id": 3106, "Ev Idx": 3105}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422175, "dur": 1, "args": {"External id": 3107, "Ev Idx": 3106}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422175, "dur": 1, "args": {"External id": 3108, "Ev Idx": 3107}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422176, "dur": 0, "args": {"External id": 3109, "Ev Idx": 3108}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422176, "dur": 0, "args": {"External id": 3110, "Ev Idx": 3109}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422177, "dur": 0, "args": {"External id": 3111, "Ev Idx": 3110}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422177, "dur": 0, "args": {"External id": 3112, "Ev Idx": 3111}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422177, "dur": 1, "args": {"External id": 3113, "Ev Idx": 3112}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422178, "dur": 0, "args": {"External id": 3114, "Ev Idx": 3113}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422178, "dur": 1, "args": {"External id": 3115, "Ev Idx": 3114}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422178, "dur": 1, "args": {"External id": 3116, "Ev Idx": 3115}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422179, "dur": 0, "args": {"External id": 3117, "Ev Idx": 3116}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422179, "dur": 0, "args": {"External id": 3118, "Ev Idx": 3117}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422180, "dur": 0, "args": {"External id": 3119, "Ev Idx": 3118}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422180, "dur": 0, "args": {"External id": 3120, "Ev Idx": 3119}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422180, "dur": 1, "args": {"External id": 3121, "Ev Idx": 3120}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422180, "dur": 1, "args": {"External id": 3122, "Ev Idx": 3121}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422181, "dur": 0, "args": {"External id": 3123, "Ev Idx": 3122}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422181, "dur": 0, "args": {"External id": 3124, "Ev Idx": 3123}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422182, "dur": 0, "args": {"External id": 3125, "Ev Idx": 3124}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422182, "dur": 0, "args": {"External id": 3126, "Ev Idx": 3125}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422182, "dur": 1, "args": {"External id": 3127, "Ev Idx": 3126}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422183, "dur": 0, "args": {"External id": 3128, "Ev Idx": 3127}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422183, "dur": 0, "args": {"External id": 3129, "Ev Idx": 3128}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422183, "dur": 0, "args": {"External id": 3130, "Ev Idx": 3129}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422184, "dur": 0, "args": {"External id": 3131, "Ev Idx": 3130}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422184, "dur": 0, "args": {"External id": 3132, "Ev Idx": 3131}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422184, "dur": 1, "args": {"External id": 3133, "Ev Idx": 3132}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422185, "dur": 0, "args": {"External id": 3134, "Ev Idx": 3133}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422185, "dur": 1, "args": {"External id": 3135, "Ev Idx": 3134}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422185, "dur": 0, "args": {"External id": 3136, "Ev Idx": 3135}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unbind", "pid": 494, "tid": 494, "ts": 1742522672422223, "dur": 92, "args": {"External id": 3137, "Ev Idx": 3136}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422223, "dur": 1, "args": {"External id": 3138, "Ev Idx": 3137}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422223, "dur": 1, "args": {"External id": 3139, "Ev Idx": 3138}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422224, "dur": 1, "args": {"External id": 3140, "Ev Idx": 3139}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422224, "dur": 1, "args": {"External id": 3141, "Ev Idx": 3140}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422225, "dur": 0, "args": {"External id": 3142, "Ev Idx": 3141}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422225, "dur": 0, "args": {"External id": 3143, "Ev Idx": 3142}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422226, "dur": 0, "args": {"External id": 3144, "Ev Idx": 3143}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422226, "dur": 0, "args": {"External id": 3145, "Ev Idx": 3144}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422226, "dur": 1, "args": {"External id": 3146, "Ev Idx": 3145}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422227, "dur": 0, "args": {"External id": 3147, "Ev Idx": 3146}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422227, "dur": 1, "args": {"External id": 3148, "Ev Idx": 3147}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422227, "dur": 1, "args": {"External id": 3149, "Ev Idx": 3148}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422228, "dur": 0, "args": {"External id": 3150, "Ev Idx": 3149}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422228, "dur": 0, "args": {"External id": 3151, "Ev Idx": 3150}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422229, "dur": 0, "args": {"External id": 3152, "Ev Idx": 3151}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422229, "dur": 0, "args": {"External id": 3153, "Ev Idx": 3152}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422229, "dur": 1, "args": {"External id": 3154, "Ev Idx": 3153}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422230, "dur": 0, "args": {"External id": 3155, "Ev Idx": 3154}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422230, "dur": 1, "args": {"External id": 3156, "Ev Idx": 3155}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422230, "dur": 0, "args": {"External id": 3157, "Ev Idx": 3156}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422231, "dur": 0, "args": {"External id": 3158, "Ev Idx": 3157}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422231, "dur": 0, "args": {"External id": 3159, "Ev Idx": 3158}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422232, "dur": 0, "args": {"External id": 3160, "Ev Idx": 3159}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422232, "dur": 0, "args": {"External id": 3161, "Ev Idx": 3160}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422232, "dur": 1, "args": {"External id": 3162, "Ev Idx": 3161}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422233, "dur": 0, "args": {"External id": 3163, "Ev Idx": 3162}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422233, "dur": 0, "args": {"External id": 3164, "Ev Idx": 3163}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422233, "dur": 0, "args": {"External id": 3165, "Ev Idx": 3164}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422234, "dur": 0, "args": {"External id": 3166, "Ev Idx": 3165}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422234, "dur": 0, "args": {"External id": 3167, "Ev Idx": 3166}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422234, "dur": 1, "args": {"External id": 3168, "Ev Idx": 3167}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422235, "dur": 0, "args": {"External id": 3169, "Ev Idx": 3168}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422235, "dur": 1, "args": {"External id": 3170, "Ev Idx": 3169}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422235, "dur": 1, "args": {"External id": 3171, "Ev Idx": 3170}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422236, "dur": 0, "args": {"External id": 3172, "Ev Idx": 3171}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422236, "dur": 0, "args": {"External id": 3173, "Ev Idx": 3172}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422237, "dur": 0, "args": {"External id": 3174, "Ev Idx": 3173}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422237, "dur": 0, "args": {"External id": 3175, "Ev Idx": 3174}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422237, "dur": 1, "args": {"External id": 3176, "Ev Idx": 3175}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422238, "dur": 0, "args": {"External id": 3177, "Ev Idx": 3176}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422238, "dur": 1, "args": {"External id": 3178, "Ev Idx": 3177}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422238, "dur": 0, "args": {"External id": 3179, "Ev Idx": 3178}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422239, "dur": 0, "args": {"External id": 3180, "Ev Idx": 3179}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422239, "dur": 0, "args": {"External id": 3181, "Ev Idx": 3180}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422239, "dur": 1, "args": {"External id": 3182, "Ev Idx": 3181}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422240, "dur": 0, "args": {"External id": 3183, "Ev Idx": 3182}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422240, "dur": 1, "args": {"External id": 3184, "Ev Idx": 3183}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422240, "dur": 1, "args": {"External id": 3185, "Ev Idx": 3184}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422241, "dur": 0, "args": {"External id": 3186, "Ev Idx": 3185}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422241, "dur": 0, "args": {"External id": 3187, "Ev Idx": 3186}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422242, "dur": 0, "args": {"External id": 3188, "Ev Idx": 3187}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422242, "dur": 0, "args": {"External id": 3189, "Ev Idx": 3188}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422242, "dur": 1, "args": {"External id": 3190, "Ev Idx": 3189}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422243, "dur": 0, "args": {"External id": 3191, "Ev Idx": 3190}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422243, "dur": 0, "args": {"External id": 3192, "Ev Idx": 3191}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422243, "dur": 0, "args": {"External id": 3193, "Ev Idx": 3192}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422244, "dur": 0, "args": {"External id": 3194, "Ev Idx": 3193}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422244, "dur": 0, "args": {"External id": 3195, "Ev Idx": 3194}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422244, "dur": 1, "args": {"External id": 3196, "Ev Idx": 3195}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422245, "dur": 0, "args": {"External id": 3197, "Ev Idx": 3196}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422245, "dur": 1, "args": {"External id": 3198, "Ev Idx": 3197}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422245, "dur": 0, "args": {"External id": 3199, "Ev Idx": 3198}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422246, "dur": 0, "args": {"External id": 3200, "Ev Idx": 3199}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422246, "dur": 0, "args": {"External id": 3201, "Ev Idx": 3200}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422247, "dur": 0, "args": {"External id": 3202, "Ev Idx": 3201}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422247, "dur": 0, "args": {"External id": 3203, "Ev Idx": 3202}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422247, "dur": 1, "args": {"External id": 3204, "Ev Idx": 3203}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422248, "dur": 0, "args": {"External id": 3205, "Ev Idx": 3204}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422248, "dur": 0, "args": {"External id": 3206, "Ev Idx": 3205}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422248, "dur": 0, "args": {"External id": 3207, "Ev Idx": 3206}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422249, "dur": 0, "args": {"External id": 3208, "Ev Idx": 3207}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422249, "dur": 0, "args": {"External id": 3209, "Ev Idx": 3208}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422249, "dur": 1, "args": {"External id": 3210, "Ev Idx": 3209}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422250, "dur": 0, "args": {"External id": 3211, "Ev Idx": 3210}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422250, "dur": 1, "args": {"External id": 3212, "Ev Idx": 3211}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422250, "dur": 0, "args": {"External id": 3213, "Ev Idx": 3212}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422251, "dur": 0, "args": {"External id": 3214, "Ev Idx": 3213}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422251, "dur": 0, "args": {"External id": 3215, "Ev Idx": 3214}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422251, "dur": 1, "args": {"External id": 3216, "Ev Idx": 3215}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422252, "dur": 0, "args": {"External id": 3217, "Ev Idx": 3216}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422252, "dur": 1, "args": {"External id": 3218, "Ev Idx": 3217}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422253, "dur": 0, "args": {"External id": 3219, "Ev Idx": 3218}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422253, "dur": 0, "args": {"External id": 3220, "Ev Idx": 3219}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422253, "dur": 0, "args": {"External id": 3221, "Ev Idx": 3220}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422254, "dur": 0, "args": {"External id": 3222, "Ev Idx": 3221}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422254, "dur": 0, "args": {"External id": 3223, "Ev Idx": 3222}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422254, "dur": 1, "args": {"External id": 3224, "Ev Idx": 3223}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422255, "dur": 0, "args": {"External id": 3225, "Ev Idx": 3224}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422255, "dur": 1, "args": {"External id": 3226, "Ev Idx": 3225}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422255, "dur": 0, "args": {"External id": 3227, "Ev Idx": 3226}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422256, "dur": 0, "args": {"External id": 3228, "Ev Idx": 3227}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422256, "dur": 0, "args": {"External id": 3229, "Ev Idx": 3228}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422257, "dur": 0, "args": {"External id": 3230, "Ev Idx": 3229}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422257, "dur": 0, "args": {"External id": 3231, "Ev Idx": 3230}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422257, "dur": 1, "args": {"External id": 3232, "Ev Idx": 3231}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422258, "dur": 0, "args": {"External id": 3233, "Ev Idx": 3232}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422258, "dur": 0, "args": {"External id": 3234, "Ev Idx": 3233}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422258, "dur": 0, "args": {"External id": 3235, "Ev Idx": 3234}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422259, "dur": 0, "args": {"External id": 3236, "Ev Idx": 3235}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422259, "dur": 0, "args": {"External id": 3237, "Ev Idx": 3236}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422259, "dur": 1, "args": {"External id": 3238, "Ev Idx": 3237}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422260, "dur": 0, "args": {"External id": 3239, "Ev Idx": 3238}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422260, "dur": 1, "args": {"External id": 3240, "Ev Idx": 3239}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422260, "dur": 0, "args": {"External id": 3241, "Ev Idx": 3240}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422261, "dur": 0, "args": {"External id": 3242, "Ev Idx": 3241}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422261, "dur": 0, "args": {"External id": 3243, "Ev Idx": 3242}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422262, "dur": 0, "args": {"External id": 3244, "Ev Idx": 3243}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422262, "dur": 0, "args": {"External id": 3245, "Ev Idx": 3244}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422262, "dur": 1, "args": {"External id": 3246, "Ev Idx": 3245}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422262, "dur": 1, "args": {"External id": 3247, "Ev Idx": 3246}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422263, "dur": 0, "args": {"External id": 3248, "Ev Idx": 3247}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422263, "dur": 0, "args": {"External id": 3249, "Ev Idx": 3248}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422264, "dur": 0, "args": {"External id": 3250, "Ev Idx": 3249}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422264, "dur": 0, "args": {"External id": 3251, "Ev Idx": 3250}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422264, "dur": 1, "args": {"External id": 3252, "Ev Idx": 3251}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422265, "dur": 0, "args": {"External id": 3253, "Ev Idx": 3252}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422265, "dur": 1, "args": {"External id": 3254, "Ev Idx": 3253}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422265, "dur": 0, "args": {"External id": 3255, "Ev Idx": 3254}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422266, "dur": 0, "args": {"External id": 3256, "Ev Idx": 3255}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422266, "dur": 0, "args": {"External id": 3257, "Ev Idx": 3256}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422266, "dur": 1, "args": {"External id": 3258, "Ev Idx": 3257}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422267, "dur": 0, "args": {"External id": 3259, "Ev Idx": 3258}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422267, "dur": 1, "args": {"External id": 3260, "Ev Idx": 3259}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422267, "dur": 1, "args": {"External id": 3261, "Ev Idx": 3260}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422268, "dur": 0, "args": {"External id": 3262, "Ev Idx": 3261}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422268, "dur": 0, "args": {"External id": 3263, "Ev Idx": 3262}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422269, "dur": 0, "args": {"External id": 3264, "Ev Idx": 3263}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422269, "dur": 0, "args": {"External id": 3265, "Ev Idx": 3264}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422269, "dur": 1, "args": {"External id": 3266, "Ev Idx": 3265}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422270, "dur": 0, "args": {"External id": 3267, "Ev Idx": 3266}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422270, "dur": 0, "args": {"External id": 3268, "Ev Idx": 3267}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422270, "dur": 0, "args": {"External id": 3269, "Ev Idx": 3268}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422271, "dur": 0, "args": {"External id": 3270, "Ev Idx": 3269}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422271, "dur": 0, "args": {"External id": 3271, "Ev Idx": 3270}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422271, "dur": 1, "args": {"External id": 3272, "Ev Idx": 3271}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422272, "dur": 0, "args": {"External id": 3273, "Ev Idx": 3272}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422272, "dur": 1, "args": {"External id": 3274, "Ev Idx": 3273}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422272, "dur": 0, "args": {"External id": 3275, "Ev Idx": 3274}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422273, "dur": 0, "args": {"External id": 3276, "Ev Idx": 3275}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422273, "dur": 0, "args": {"External id": 3277, "Ev Idx": 3276}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422274, "dur": 0, "args": {"External id": 3278, "Ev Idx": 3277}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422274, "dur": 0, "args": {"External id": 3279, "Ev Idx": 3278}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422274, "dur": 1, "args": {"External id": 3280, "Ev Idx": 3279}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422275, "dur": 0, "args": {"External id": 3281, "Ev Idx": 3280}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422275, "dur": 1, "args": {"External id": 3282, "Ev Idx": 3281}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422275, "dur": 0, "args": {"External id": 3283, "Ev Idx": 3282}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422276, "dur": 0, "args": {"External id": 3284, "Ev Idx": 3283}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422276, "dur": 0, "args": {"External id": 3285, "Ev Idx": 3284}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422276, "dur": 1, "args": {"External id": 3286, "Ev Idx": 3285}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422277, "dur": 0, "args": {"External id": 3287, "Ev Idx": 3286}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422277, "dur": 1, "args": {"External id": 3288, "Ev Idx": 3287}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422277, "dur": 1, "args": {"External id": 3289, "Ev Idx": 3288}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422278, "dur": 0, "args": {"External id": 3290, "Ev Idx": 3289}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422278, "dur": 0, "args": {"External id": 3291, "Ev Idx": 3290}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422279, "dur": 0, "args": {"External id": 3292, "Ev Idx": 3291}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422279, "dur": 0, "args": {"External id": 3293, "Ev Idx": 3292}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422279, "dur": 1, "args": {"External id": 3294, "Ev Idx": 3293}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422280, "dur": 0, "args": {"External id": 3295, "Ev Idx": 3294}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422280, "dur": 1, "args": {"External id": 3296, "Ev Idx": 3295}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422280, "dur": 1, "args": {"External id": 3297, "Ev Idx": 3296}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422281, "dur": 0, "args": {"External id": 3298, "Ev Idx": 3297}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422281, "dur": 0, "args": {"External id": 3299, "Ev Idx": 3298}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422282, "dur": 0, "args": {"External id": 3300, "Ev Idx": 3299}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422282, "dur": 0, "args": {"External id": 3301, "Ev Idx": 3300}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422282, "dur": 1, "args": {"External id": 3302, "Ev Idx": 3301}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422283, "dur": 0, "args": {"External id": 3303, "Ev Idx": 3302}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422283, "dur": 1, "args": {"External id": 3304, "Ev Idx": 3303}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422283, "dur": 1, "args": {"External id": 3305, "Ev Idx": 3304}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422284, "dur": 0, "args": {"External id": 3306, "Ev Idx": 3305}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422284, "dur": 0, "args": {"External id": 3307, "Ev Idx": 3306}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422285, "dur": 0, "args": {"External id": 3308, "Ev Idx": 3307}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422285, "dur": 0, "args": {"External id": 3309, "Ev Idx": 3308}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422285, "dur": 1, "args": {"External id": 3310, "Ev Idx": 3309}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422286, "dur": 0, "args": {"External id": 3311, "Ev Idx": 3310}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422286, "dur": 1, "args": {"External id": 3312, "Ev Idx": 3311}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422286, "dur": 0, "args": {"External id": 3313, "Ev Idx": 3312}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422287, "dur": 0, "args": {"External id": 3314, "Ev Idx": 3313}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422287, "dur": 0, "args": {"External id": 3315, "Ev Idx": 3314}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422288, "dur": 0, "args": {"External id": 3316, "Ev Idx": 3315}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422288, "dur": 0, "args": {"External id": 3317, "Ev Idx": 3316}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422288, "dur": 1, "args": {"External id": 3318, "Ev Idx": 3317}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422289, "dur": 0, "args": {"External id": 3319, "Ev Idx": 3318}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422289, "dur": 1, "args": {"External id": 3320, "Ev Idx": 3319}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422289, "dur": 0, "args": {"External id": 3321, "Ev Idx": 3320}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422290, "dur": 0, "args": {"External id": 3322, "Ev Idx": 3321}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422290, "dur": 0, "args": {"External id": 3323, "Ev Idx": 3322}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422291, "dur": 0, "args": {"External id": 3324, "Ev Idx": 3323}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422291, "dur": 0, "args": {"External id": 3325, "Ev Idx": 3324}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422291, "dur": 1, "args": {"External id": 3326, "Ev Idx": 3325}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422292, "dur": 0, "args": {"External id": 3327, "Ev Idx": 3326}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422292, "dur": 1, "args": {"External id": 3328, "Ev Idx": 3327}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422292, "dur": 0, "args": {"External id": 3329, "Ev Idx": 3328}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422293, "dur": 0, "args": {"External id": 3330, "Ev Idx": 3329}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422293, "dur": 0, "args": {"External id": 3331, "Ev Idx": 3330}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422293, "dur": 1, "args": {"External id": 3332, "Ev Idx": 3331}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422294, "dur": 0, "args": {"External id": 3333, "Ev Idx": 3332}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422294, "dur": 1, "args": {"External id": 3334, "Ev Idx": 3333}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422294, "dur": 0, "args": {"External id": 3335, "Ev Idx": 3334}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422295, "dur": 0, "args": {"External id": 3336, "Ev Idx": 3335}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422295, "dur": 0, "args": {"External id": 3337, "Ev Idx": 3336}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422296, "dur": 0, "args": {"External id": 3338, "Ev Idx": 3337}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422296, "dur": 0, "args": {"External id": 3339, "Ev Idx": 3338}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422296, "dur": 1, "args": {"External id": 3340, "Ev Idx": 3339}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422296, "dur": 1, "args": {"External id": 3341, "Ev Idx": 3340}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422297, "dur": 0, "args": {"External id": 3342, "Ev Idx": 3341}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422297, "dur": 0, "args": {"External id": 3343, "Ev Idx": 3342}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422298, "dur": 0, "args": {"External id": 3344, "Ev Idx": 3343}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422298, "dur": 0, "args": {"External id": 3345, "Ev Idx": 3344}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422298, "dur": 1, "args": {"External id": 3346, "Ev Idx": 3345}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422299, "dur": 0, "args": {"External id": 3347, "Ev Idx": 3346}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422299, "dur": 1, "args": {"External id": 3348, "Ev Idx": 3347}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422299, "dur": 1, "args": {"External id": 3349, "Ev Idx": 3348}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422300, "dur": 0, "args": {"External id": 3350, "Ev Idx": 3349}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422300, "dur": 0, "args": {"External id": 3351, "Ev Idx": 3350}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422301, "dur": 0, "args": {"External id": 3352, "Ev Idx": 3351}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422301, "dur": 0, "args": {"External id": 3353, "Ev Idx": 3352}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422301, "dur": 1, "args": {"External id": 3354, "Ev Idx": 3353}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422302, "dur": 0, "args": {"External id": 3355, "Ev Idx": 3354}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422302, "dur": 0, "args": {"External id": 3356, "Ev Idx": 3355}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422302, "dur": 0, "args": {"External id": 3357, "Ev Idx": 3356}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422303, "dur": 0, "args": {"External id": 3358, "Ev Idx": 3357}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422303, "dur": 0, "args": {"External id": 3359, "Ev Idx": 3358}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422303, "dur": 1, "args": {"External id": 3360, "Ev Idx": 3359}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422304, "dur": 0, "args": {"External id": 3361, "Ev Idx": 3360}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422304, "dur": 1, "args": {"External id": 3362, "Ev Idx": 3361}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422304, "dur": 0, "args": {"External id": 3363, "Ev Idx": 3362}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422305, "dur": 0, "args": {"External id": 3364, "Ev Idx": 3363}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422305, "dur": 0, "args": {"External id": 3365, "Ev Idx": 3364}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422305, "dur": 1, "args": {"External id": 3366, "Ev Idx": 3365}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422306, "dur": 0, "args": {"External id": 3367, "Ev Idx": 3366}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422306, "dur": 1, "args": {"External id": 3368, "Ev Idx": 3367}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422306, "dur": 1, "args": {"External id": 3369, "Ev Idx": 3368}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422307, "dur": 1, "args": {"External id": 3370, "Ev Idx": 3369}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422307, "dur": 0, "args": {"External id": 3371, "Ev Idx": 3370}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422308, "dur": 0, "args": {"External id": 3372, "Ev Idx": 3371}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422308, "dur": 0, "args": {"External id": 3373, "Ev Idx": 3372}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422309, "dur": 0, "args": {"External id": 3374, "Ev Idx": 3373}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422309, "dur": 0, "args": {"External id": 3375, "Ev Idx": 3374}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422309, "dur": 1, "args": {"External id": 3376, "Ev Idx": 3375}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422310, "dur": 0, "args": {"External id": 3377, "Ev Idx": 3376}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422310, "dur": 0, "args": {"External id": 3378, "Ev Idx": 3377}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422310, "dur": 0, "args": {"External id": 3379, "Ev Idx": 3378}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422311, "dur": 0, "args": {"External id": 3380, "Ev Idx": 3379}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422311, "dur": 0, "args": {"External id": 3381, "Ev Idx": 3380}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422311, "dur": 1, "args": {"External id": 3382, "Ev Idx": 3381}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422312, "dur": 0, "args": {"External id": 3383, "Ev Idx": 3382}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422312, "dur": 1, "args": {"External id": 3384, "Ev Idx": 3383}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422312, "dur": 0, "args": {"External id": 3385, "Ev Idx": 3384}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422313, "dur": 0, "args": {"External id": 3386, "Ev Idx": 3385}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422313, "dur": 0, "args": {"External id": 3387, "Ev Idx": 3386}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422313, "dur": 1, "args": {"External id": 3388, "Ev Idx": 3387}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422314, "dur": 0, "args": {"External id": 3389, "Ev Idx": 3388}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422314, "dur": 1, "args": {"External id": 3390, "Ev Idx": 3389}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422314, "dur": 1, "args": {"External id": 3391, "Ev Idx": 3390}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422315, "dur": 0, "args": {"External id": 3392, "Ev Idx": 3391}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422315, "dur": 0, "args": {"External id": 3393, "Ev Idx": 3392}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672422357, "dur": 3, "args": {"External id": 3394, "Ev Idx": 3393}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672422359, "dur": 1, "args": {"External id": 3395, "Ev Idx": 3394}}, {"ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 494, "tid": 494, "ts": 1742522672422372, "dur": 64, "args": {"External id": 3396, "Ev Idx": 3395}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672422378, "dur": 56, "args": {"External id": 3397, "Ev Idx": 3396}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672422435, "dur": 1, "args": {"External id": 3398, "Ev Idx": 3397}}, {"ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 494, "tid": 494, "ts": 1742522672422443, "dur": 39, "args": {"External id": 3399, "Ev Idx": 3398}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672422445, "dur": 36, "args": {"External id": 3400, "Ev Idx": 3399}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672422482, "dur": 0, "args": {"External id": 3401, "Ev Idx": 3400}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422494, "dur": 3, "args": {"External id": 3402, "Ev Idx": 3401}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422500, "dur": 3, "args": {"External id": 3403, "Ev Idx": 3402}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422526, "dur": 3, "args": {"External id": 3404, "Ev Idx": 3403}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422529, "dur": 3, "args": {"External id": 3405, "Ev Idx": 3404}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sort", "pid": 494, "tid": 494, "ts": 1742522672422550, "dur": 87, "args": {"External id": 3406, "Ev Idx": 3405}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sort", "pid": 494, "tid": 494, "ts": 1742522672422552, "dur": 85, "args": {"External id": 3407, "Ev Idx": 3406}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672422559, "dur": 17, "args": {"External id": 3408, "Ev Idx": 3407}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672422579, "dur": 23, "args": {"External id": 3409, "Ev Idx": 3408}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422583, "dur": 2, "args": {"External id": 3410, "Ev Idx": 3409}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672422587, "dur": 15, "args": {"External id": 3411, "Ev Idx": 3410}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672422591, "dur": 3, "args": {"External id": 3412, "Ev Idx": 3411}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672422603, "dur": 1, "args": {"External id": 3413, "Ev Idx": 3412}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672422604, "dur": 11, "args": {"External id": 3414, "Ev Idx": 3413}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672422617, "dur": 7, "args": {"External id": 3415, "Ev Idx": 3414}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672422656, "dur": 11, "args": {"External id": 3416, "Ev Idx": 3415}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422657, "dur": 2, "args": {"External id": 3417, "Ev Idx": 3416}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672422659, "dur": 8, "args": {"External id": 3418, "Ev Idx": 3417}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672422660, "dur": 2, "args": {"External id": 3419, "Ev Idx": 3418}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 494, "tid": 494, "ts": 1742522672422671, "dur": 3, "args": {"External id": 3420, "Ev Idx": 3419}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672422671, "dur": 3, "args": {"External id": 3421, "Ev Idx": 3420}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422673, "dur": 1, "args": {"External id": 3422, "Ev Idx": 3421}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672422677, "dur": 4, "args": {"External id": 3423, "Ev Idx": 3422}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672422677, "dur": 4, "args": {"External id": 3424, "Ev Idx": 3423}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scatter_", "pid": 494, "tid": 494, "ts": 1742522672422685, "dur": 23, "args": {"External id": 3425, "Ev Idx": 3424}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422694, "dur": 0, "args": {"External id": 3426, "Ev Idx": 3425}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422694, "dur": 0, "args": {"External id": 3427, "Ev Idx": 3426}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672422714, "dur": 17, "args": {"External id": 3428, "Ev Idx": 3427}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422721, "dur": 0, "args": {"External id": 3429, "Ev Idx": 3428}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422722, "dur": 0, "args": {"External id": 3430, "Ev Idx": 3429}}, {"ph": "X", "cat": "cpu_op", "name": "aten::softmax", "pid": 494, "tid": 494, "ts": 1742522672422739, "dur": 18, "args": {"External id": 3431, "Ev Idx": 3430}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672422741, "dur": 0, "args": {"External id": 3432, "Ev Idx": 3431}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_softmax", "pid": 494, "tid": 494, "ts": 1742522672422743, "dur": 13, "args": {"External id": 3433, "Ev Idx": 3432}}, {"ph": "X", "cat": "cpu_op", "name": "aten::log", "pid": 494, "tid": 494, "ts": 1742522672422760, "dur": 16, "args": {"External id": 3434, "Ev Idx": 3433}}, {"ph": "X", "cat": "cpu_op", "name": "aten::full", "pid": 494, "tid": 494, "ts": 1742522672422787, "dur": 17, "args": {"External id": 3435, "Ev Idx": 3434}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422789, "dur": 4, "args": {"External id": 3436, "Ev Idx": 3435}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672422793, "dur": 10, "args": {"External id": 3437, "Ev Idx": 3436}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scatter_", "pid": 494, "tid": 494, "ts": 1742522672422805, "dur": 11, "args": {"External id": 3438, "Ev Idx": 3437}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422807, "dur": 0, "args": {"External id": 3439, "Ev Idx": 3438}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422808, "dur": 0, "args": {"External id": 3440, "Ev Idx": 3439}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672422821, "dur": 1, "args": {"External id": 3441, "Ev Idx": 3440}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422822, "dur": 0, "args": {"External id": 3442, "Ev Idx": 3441}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422825, "dur": 1, "args": {"External id": 3443, "Ev Idx": 3442}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422826, "dur": 0, "args": {"External id": 3444, "Ev Idx": 3443}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672422830, "dur": 0, "args": {"External id": 3445, "Ev Idx": 3444}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422830, "dur": 0, "args": {"External id": 3446, "Ev Idx": 3445}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422831, "dur": 0, "args": {"External id": 3447, "Ev Idx": 3446}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422831, "dur": 0, "args": {"External id": 3448, "Ev Idx": 3447}}, {"ph": "X", "cat": "cpu_op", "name": "aten::full", "pid": 494, "tid": 494, "ts": 1742522672422845, "dur": 11, "args": {"External id": 3449, "Ev Idx": 3448}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422845, "dur": 4, "args": {"External id": 3450, "Ev Idx": 3449}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672422849, "dur": 7, "args": {"External id": 3451, "Ev Idx": 3450}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scatter_", "pid": 494, "tid": 494, "ts": 1742522672422857, "dur": 9, "args": {"External id": 3452, "Ev Idx": 3451}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422859, "dur": 0, "args": {"External id": 3453, "Ev Idx": 3452}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422859, "dur": 1, "args": {"External id": 3454, "Ev Idx": 3453}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672422868, "dur": 1, "args": {"External id": 3455, "Ev Idx": 3454}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422869, "dur": 0, "args": {"External id": 3456, "Ev Idx": 3455}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672422875, "dur": 1, "args": {"External id": 3457, "Ev Idx": 3456}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422875, "dur": 1, "args": {"External id": 3458, "Ev Idx": 3457}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672422877, "dur": 10, "args": {"External id": 3459, "Ev Idx": 3458}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422880, "dur": 1, "args": {"External id": 3460, "Ev Idx": 3459}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422881, "dur": 0, "args": {"External id": 3461, "Ev Idx": 3460}}, {"ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 494, "tid": 494, "ts": 1742522672422891, "dur": 2, "args": {"External id": 3462, "Ev Idx": 3461}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422893, "dur": 0, "args": {"External id": 3463, "Ev Idx": 3462}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672422894, "dur": 1, "args": {"External id": 3464, "Ev Idx": 3463}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422894, "dur": 1, "args": {"External id": 3465, "Ev Idx": 3464}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672422896, "dur": 9, "args": {"External id": 3466, "Ev Idx": 3465}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422898, "dur": 1, "args": {"External id": 3467, "Ev Idx": 3466}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422899, "dur": 0, "args": {"External id": 3468, "Ev Idx": 3467}}, {"ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 494, "tid": 494, "ts": 1742522672422906, "dur": 1, "args": {"External id": 3469, "Ev Idx": 3468}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672422906, "dur": 1, "args": {"External id": 3470, "Ev Idx": 3469}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gt", "pid": 494, "tid": 494, "ts": 1742522672422912, "dur": 24, "args": {"External id": 3471, "Ev Idx": 3470}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 494, "tid": 494, "ts": 1742522672422942, "dur": 17, "args": {"External id": 3472, "Ev Idx": 3471}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 494, "tid": 494, "ts": 1742522672422964, "dur": 18, "args": {"External id": 3473, "Ev Idx": 3472}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672422966, "dur": 0, "args": {"External id": 3474, "Ev Idx": 3473}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672422988, "dur": 39, "args": {"External id": 3475, "Ev Idx": 3474}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scalar_tensor", "pid": 494, "tid": 494, "ts": 1742522672422990, "dur": 10, "args": {"External id": 3476, "Ev Idx": 3475}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672422991, "dur": 3, "args": {"External id": 3477, "Ev Idx": 3476}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672422994, "dur": 6, "args": {"External id": 3478, "Ev Idx": 3477}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672423001, "dur": 0, "args": {"External id": 3479, "Ev Idx": 3478}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423003, "dur": 23, "args": {"External id": 3480, "Ev Idx": 3479}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423006, "dur": 2, "args": {"External id": 3481, "Ev Idx": 3480}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423011, "dur": 3, "args": {"External id": 3482, "Ev Idx": 3481}}, {"ph": "X", "cat": "cpu_op", "name": "aten::exp", "pid": 494, "tid": 494, "ts": 1742522672423031, "dur": 13, "args": {"External id": 3483, "Ev Idx": 3482}}, {"ph": "X", "cat": "cpu_op", "name": "aten::rand_like", "pid": 494, "tid": 494, "ts": 1742522672423049, "dur": 26, "args": {"External id": 3484, "Ev Idx": 3483}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672423050, "dur": 4, "args": {"External id": 3485, "Ev Idx": 3484}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672423051, "dur": 3, "args": {"External id": 3486, "Ev Idx": 3485}}, {"ph": "X", "cat": "cpu_op", "name": "aten::uniform_", "pid": 494, "tid": 494, "ts": 1742522672423057, "dur": 18, "args": {"External id": 3487, "Ev Idx": 3486}}, {"ph": "X", "cat": "cpu_op", "name": "aten::lt", "pid": 494, "tid": 494, "ts": 1742522672423079, "dur": 13, "args": {"External id": 3488, "Ev Idx": 3487}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672423097, "dur": 12, "args": {"External id": 3489, "Ev Idx": 3488}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423101, "dur": 0, "args": {"External id": 3490, "Ev Idx": 3489}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423102, "dur": 0, "args": {"External id": 3491, "Ev Idx": 3490}}, {"ph": "X", "cat": "cpu_op", "name": "aten::exp", "pid": 494, "tid": 494, "ts": 1742522672423112, "dur": 8, "args": {"External id": 3492, "Ev Idx": 3491}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 494, "tid": 494, "ts": 1742522672423121, "dur": 9, "args": {"External id": 3493, "Ev Idx": 3492}}, {"ph": "X", "cat": "cpu_op", "name": "aten::relu_", "pid": 494, "tid": 494, "ts": 1742522672423134, "dur": 13, "args": {"External id": 3494, "Ev Idx": 3493}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clamp_min_", "pid": 494, "tid": 494, "ts": 1742522672423137, "dur": 10, "args": {"External id": 3495, "Ev Idx": 3494}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672423152, "dur": 20, "args": {"External id": 3496, "Ev Idx": 3495}}, {"ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 494, "tid": 494, "ts": 1742522672423176, "dur": 17, "args": {"External id": 3497, "Ev Idx": 3496}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672423198, "dur": 3, "args": {"External id": 3498, "Ev Idx": 3497}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672423198, "dur": 3, "args": {"External id": 3499, "Ev Idx": 3498}}, {"ph": "X", "cat": "cpu_op", "name": "aten::exponential_", "pid": 494, "tid": 494, "ts": 1742522672423206, "dur": 14, "args": {"External id": 3500, "Ev Idx": 3499}}, {"ph": "X", "cat": "cpu_op", "name": "aten::div_", "pid": 494, "tid": 494, "ts": 1742522672423222, "dur": 9, "args": {"External id": 3501, "Ev Idx": 3500}}, {"ph": "X", "cat": "cpu_op", "name": "aten::argmax", "pid": 494, "tid": 494, "ts": 1742522672423235, "dur": 18, "args": {"External id": 3502, "Ev Idx": 3501}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423241, "dur": 0, "args": {"External id": 3503, "Ev Idx": 3502}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672423255, "dur": 0, "args": {"External id": 3504, "Ev Idx": 3503}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423255, "dur": 0, "args": {"External id": 3505, "Ev Idx": 3504}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672423256, "dur": 12, "args": {"External id": 3506, "Ev Idx": 3505}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423260, "dur": 0, "args": {"External id": 3507, "Ev Idx": 3506}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423260, "dur": 0, "args": {"External id": 3508, "Ev Idx": 3507}}, {"ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 494, "tid": 494, "ts": 1742522672423269, "dur": 2, "args": {"External id": 3509, "Ev Idx": 3508}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423270, "dur": 0, "args": {"External id": 3510, "Ev Idx": 3509}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423279, "dur": 16, "args": {"External id": 3511, "Ev Idx": 3510}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423280, "dur": 2, "args": {"External id": 3512, "Ev Idx": 3511}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423284, "dur": 2, "args": {"External id": 3513, "Ev Idx": 3512}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672423299, "dur": 1, "args": {"External id": 3514, "Ev Idx": 3513}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423300, "dur": 0, "args": {"External id": 3515, "Ev Idx": 3514}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672423301, "dur": 1, "args": {"External id": 3516, "Ev Idx": 3515}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423301, "dur": 1, "args": {"External id": 3517, "Ev Idx": 3516}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672423304, "dur": 9, "args": {"External id": 3518, "Ev Idx": 3517}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423306, "dur": 1, "args": {"External id": 3519, "Ev Idx": 3518}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423307, "dur": 0, "args": {"External id": 3520, "Ev Idx": 3519}}, {"ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 494, "tid": 494, "ts": 1742522672423315, "dur": 0, "args": {"External id": 3521, "Ev Idx": 3520}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423315, "dur": 0, "args": {"External id": 3522, "Ev Idx": 3521}}, {"ph": "X", "cat": "cpu_op", "name": "aten::topk", "pid": 494, "tid": 494, "ts": 1742522672423321, "dur": 48, "args": {"External id": 3523, "Ev Idx": 3522}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672423372, "dur": 11, "args": {"External id": 3524, "Ev Idx": 3523}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423375, "dur": 0, "args": {"External id": 3525, "Ev Idx": 3524}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423376, "dur": 0, "args": {"External id": 3526, "Ev Idx": 3525}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423388, "dur": 29, "args": {"External id": 3527, "Ev Idx": 3526}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scalar_tensor", "pid": 494, "tid": 494, "ts": 1742522672423389, "dur": 12, "args": {"External id": 3528, "Ev Idx": 3527}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423389, "dur": 3, "args": {"External id": 3529, "Ev Idx": 3528}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672423393, "dur": 8, "args": {"External id": 3530, "Ev Idx": 3529}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672423401, "dur": 1, "args": {"External id": 3531, "Ev Idx": 3530}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423402, "dur": 15, "args": {"External id": 3532, "Ev Idx": 3531}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423403, "dur": 2, "args": {"External id": 3533, "Ev Idx": 3532}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423406, "dur": 3, "args": {"External id": 3534, "Ev Idx": 3533}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 494, "tid": 494, "ts": 1742522672423422, "dur": 17, "args": {"External id": 3535, "Ev Idx": 3534}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672423423, "dur": 3, "args": {"External id": 3536, "Ev Idx": 3535}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672423427, "dur": 12, "args": {"External id": 3537, "Ev Idx": 3536}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 494, "tid": 494, "ts": 1742522672423442, "dur": 2, "args": {"External id": 3538, "Ev Idx": 3537}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 494, "tid": 494, "ts": 1742522672423445, "dur": 9, "args": {"External id": 3539, "Ev Idx": 3538}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672423445, "dur": 2, "args": {"External id": 3540, "Ev Idx": 3539}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672423448, "dur": 6, "args": {"External id": 3541, "Ev Idx": 3540}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 494, "tid": 494, "ts": 1742522672423455, "dur": 0, "args": {"External id": 3542, "Ev Idx": 3541}}, {"ph": "X", "cat": "cpu_op", "name": "aten::bitwise_not", "pid": 494, "tid": 494, "ts": 1742522672423460, "dur": 13, "args": {"External id": 3543, "Ev Idx": 3542}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423477, "dur": 33, "args": {"External id": 3544, "Ev Idx": 3543}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scalar_tensor", "pid": 494, "tid": 494, "ts": 1742522672423478, "dur": 9, "args": {"External id": 3545, "Ev Idx": 3544}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423478, "dur": 3, "args": {"External id": 3546, "Ev Idx": 3545}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672423482, "dur": 5, "args": {"External id": 3547, "Ev Idx": 3546}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scalar_tensor", "pid": 494, "tid": 494, "ts": 1742522672423488, "dur": 8, "args": {"External id": 3548, "Ev Idx": 3547}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423488, "dur": 3, "args": {"External id": 3549, "Ev Idx": 3548}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672423491, "dur": 5, "args": {"External id": 3550, "Ev Idx": 3549}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423497, "dur": 12, "args": {"External id": 3551, "Ev Idx": 3550}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423498, "dur": 2, "args": {"External id": 3552, "Ev Idx": 3551}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423501, "dur": 2, "args": {"External id": 3553, "Ev Idx": 3552}}, {"ph": "X", "cat": "cpu_op", "name": "aten::bitwise_not", "pid": 494, "tid": 494, "ts": 1742522672423512, "dur": 7, "args": {"External id": 3554, "Ev Idx": 3553}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672423522, "dur": 2, "args": {"External id": 3555, "Ev Idx": 3554}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423523, "dur": 1, "args": {"External id": 3556, "Ev Idx": 3555}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672423525, "dur": 0, "args": {"External id": 3557, "Ev Idx": 3556}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423525, "dur": 0, "args": {"External id": 3558, "Ev Idx": 3557}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423527, "dur": 28, "args": {"External id": 3559, "Ev Idx": 3558}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423528, "dur": 2, "args": {"External id": 3560, "Ev Idx": 3559}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672423531, "dur": 0, "args": {"External id": 3561, "Ev Idx": 3560}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672423531, "dur": 14, "args": {"External id": 3562, "Ev Idx": 3561}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672423532, "dur": 13, "args": {"External id": 3563, "Ev Idx": 3562}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672423533, "dur": 2, "args": {"External id": 3564, "Ev Idx": 3563}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672423536, "dur": 9, "args": {"External id": 3565, "Ev Idx": 3564}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423546, "dur": 3, "args": {"External id": 3566, "Ev Idx": 3565}}, {"ph": "X", "cat": "cpu_op", "name": "aten::bitwise_not", "pid": 494, "tid": 494, "ts": 1742522672423558, "dur": 7, "args": {"External id": 3567, "Ev Idx": 3566}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672423567, "dur": 1, "args": {"External id": 3568, "Ev Idx": 3567}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423567, "dur": 1, "args": {"External id": 3569, "Ev Idx": 3568}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672423568, "dur": 1, "args": {"External id": 3570, "Ev Idx": 3569}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423569, "dur": 0, "args": {"External id": 3571, "Ev Idx": 3570}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423570, "dur": 13, "args": {"External id": 3572, "Ev Idx": 3571}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423571, "dur": 2, "args": {"External id": 3573, "Ev Idx": 3572}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423574, "dur": 2, "args": {"External id": 3574, "Ev Idx": 3573}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672423587, "dur": 26, "args": {"External id": 3575, "Ev Idx": 3574}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672423588, "dur": 25, "args": {"External id": 3576, "Ev Idx": 3575}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423591, "dur": 0, "args": {"External id": 3577, "Ev Idx": 3576}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672423591, "dur": 11, "args": {"External id": 3578, "Ev Idx": 3577}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672423591, "dur": 10, "args": {"External id": 3579, "Ev Idx": 3578}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672423592, "dur": 3, "args": {"External id": 3580, "Ev Idx": 3579}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672423595, "dur": 6, "args": {"External id": 3581, "Ev Idx": 3580}}, {"ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 494, "tid": 494, "ts": 1742522672423617, "dur": 27, "args": {"External id": 3582, "Ev Idx": 3581}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672423618, "dur": 1, "args": {"External id": 3583, "Ev Idx": 3582}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423619, "dur": 0, "args": {"External id": 3584, "Ev Idx": 3583}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672423622, "dur": 22, "args": {"External id": 3585, "Ev Idx": 3584}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672423629, "dur": 5, "args": {"External id": 3586, "Ev Idx": 3585}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672423633, "dur": 1, "args": {"External id": 3587, "Ev Idx": 3586}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423633, "dur": 1, "args": {"External id": 3588, "Ev Idx": 3587}}, {"ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 494, "tid": 494, "ts": 1742522672423646, "dur": 12, "args": {"External id": 3589, "Ev Idx": 3588}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672423647, "dur": 0, "args": {"External id": 3590, "Ev Idx": 3589}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423647, "dur": 0, "args": {"External id": 3591, "Ev Idx": 3590}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672423648, "dur": 10, "args": {"External id": 3592, "Ev Idx": 3591}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672423651, "dur": 1, "args": {"External id": 3593, "Ev Idx": 3592}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672423651, "dur": 1, "args": {"External id": 3594, "Ev Idx": 3593}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423651, "dur": 1, "args": {"External id": 3595, "Ev Idx": 3594}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423662, "dur": 36, "args": {"External id": 3596, "Ev Idx": 3595}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scalar_tensor", "pid": 494, "tid": 494, "ts": 1742522672423662, "dur": 21, "args": {"External id": 3597, "Ev Idx": 3596}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423663, "dur": 3, "args": {"External id": 3598, "Ev Idx": 3597}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672423666, "dur": 17, "args": {"External id": 3599, "Ev Idx": 3598}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672423683, "dur": 0, "args": {"External id": 3600, "Ev Idx": 3599}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672423684, "dur": 14, "args": {"External id": 3601, "Ev Idx": 3600}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423685, "dur": 2, "args": {"External id": 3602, "Ev Idx": 3601}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423689, "dur": 2, "args": {"External id": 3603, "Ev Idx": 3602}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423710, "dur": 3, "args": {"External id": 3604, "Ev Idx": 3603}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423714, "dur": 2, "args": {"External id": 3605, "Ev Idx": 3604}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sort", "pid": 494, "tid": 494, "ts": 1742522672423726, "dur": 56, "args": {"External id": 3606, "Ev Idx": 3605}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sort", "pid": 494, "tid": 494, "ts": 1742522672423727, "dur": 54, "args": {"External id": 3607, "Ev Idx": 3606}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672423733, "dur": 11, "args": {"External id": 3608, "Ev Idx": 3607}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672423745, "dur": 14, "args": {"External id": 3609, "Ev Idx": 3608}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423746, "dur": 2, "args": {"External id": 3610, "Ev Idx": 3609}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672423749, "dur": 9, "args": {"External id": 3611, "Ev Idx": 3610}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423750, "dur": 2, "args": {"External id": 3612, "Ev Idx": 3611}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672423759, "dur": 1, "args": {"External id": 3613, "Ev Idx": 3612}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672423761, "dur": 7, "args": {"External id": 3614, "Ev Idx": 3613}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672423769, "dur": 6, "args": {"External id": 3615, "Ev Idx": 3614}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672423795, "dur": 11, "args": {"External id": 3616, "Ev Idx": 3615}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672423795, "dur": 3, "args": {"External id": 3617, "Ev Idx": 3616}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672423798, "dur": 8, "args": {"External id": 3618, "Ev Idx": 3617}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672423799, "dur": 2, "args": {"External id": 3619, "Ev Idx": 3618}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 494, "tid": 494, "ts": 1742522672423808, "dur": 2, "args": {"External id": 3620, "Ev Idx": 3619}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672423808, "dur": 2, "args": {"External id": 3621, "Ev Idx": 3620}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423809, "dur": 1, "args": {"External id": 3622, "Ev Idx": 3621}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672423812, "dur": 5, "args": {"External id": 3623, "Ev Idx": 3622}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672423813, "dur": 3, "args": {"External id": 3624, "Ev Idx": 3623}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scatter_", "pid": 494, "tid": 494, "ts": 1742522672423818, "dur": 12, "args": {"External id": 3625, "Ev Idx": 3624}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423821, "dur": 1, "args": {"External id": 3626, "Ev Idx": 3625}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423822, "dur": 0, "args": {"External id": 3627, "Ev Idx": 3626}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672423832, "dur": 11, "args": {"External id": 3628, "Ev Idx": 3627}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423836, "dur": 0, "args": {"External id": 3629, "Ev Idx": 3628}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423836, "dur": 1, "args": {"External id": 3630, "Ev Idx": 3629}}, {"ph": "X", "cat": "cpu_op", "name": "aten::softmax", "pid": 494, "tid": 494, "ts": 1742522672423848, "dur": 11, "args": {"External id": 3631, "Ev Idx": 3630}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672423849, "dur": 0, "args": {"External id": 3632, "Ev Idx": 3631}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_softmax", "pid": 494, "tid": 494, "ts": 1742522672423850, "dur": 8, "args": {"External id": 3633, "Ev Idx": 3632}}, {"ph": "X", "cat": "cpu_op", "name": "aten::log", "pid": 494, "tid": 494, "ts": 1742522672423867, "dur": 9, "args": {"External id": 3634, "Ev Idx": 3633}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672423878, "dur": 3, "args": {"External id": 3635, "Ev Idx": 3634}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672423878, "dur": 3, "args": {"External id": 3636, "Ev Idx": 3635}}, {"ph": "X", "cat": "cpu_op", "name": "aten::exponential_", "pid": 494, "tid": 494, "ts": 1742522672423883, "dur": 11, "args": {"External id": 3637, "Ev Idx": 3636}}, {"ph": "X", "cat": "cpu_op", "name": "aten::div_", "pid": 494, "tid": 494, "ts": 1742522672423896, "dur": 10, "args": {"External id": 3638, "Ev Idx": 3637}}, {"ph": "X", "cat": "cpu_op", "name": "aten::argmax", "pid": 494, "tid": 494, "ts": 1742522672423909, "dur": 13, "args": {"External id": 3639, "Ev Idx": 3638}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423913, "dur": 0, "args": {"External id": 3640, "Ev Idx": 3639}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672423926, "dur": 2, "args": {"External id": 3641, "Ev Idx": 3640}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423927, "dur": 0, "args": {"External id": 3642, "Ev Idx": 3641}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672423929, "dur": 0, "args": {"External id": 3643, "Ev Idx": 3642}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423929, "dur": 0, "args": {"External id": 3644, "Ev Idx": 3643}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672423932, "dur": 9, "args": {"External id": 3645, "Ev Idx": 3644}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423934, "dur": 0, "args": {"External id": 3646, "Ev Idx": 3645}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423935, "dur": 0, "args": {"External id": 3647, "Ev Idx": 3646}}, {"ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 494, "tid": 494, "ts": 1742522672423943, "dur": 1, "args": {"External id": 3648, "Ev Idx": 3647}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423944, "dur": 0, "args": {"External id": 3649, "Ev Idx": 3648}}, {"ph": "X", "cat": "cpu_op", "name": "aten::topk", "pid": 494, "tid": 494, "ts": 1742522672423947, "dur": 28, "args": {"External id": 3650, "Ev Idx": 3649}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672423978, "dur": 0, "args": {"External id": 3651, "Ev Idx": 3650}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423978, "dur": 0, "args": {"External id": 3652, "Ev Idx": 3651}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672423980, "dur": 9, "args": {"External id": 3653, "Ev Idx": 3652}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423982, "dur": 0, "args": {"External id": 3654, "Ev Idx": 3653}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423983, "dur": 0, "args": {"External id": 3655, "Ev Idx": 3654}}, {"ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 494, "tid": 494, "ts": 1742522672423991, "dur": 0, "args": {"External id": 3656, "Ev Idx": 3655}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423991, "dur": 0, "args": {"External id": 3657, "Ev Idx": 3656}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672423993, "dur": 9, "args": {"External id": 3658, "Ev Idx": 3657}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423996, "dur": 0, "args": {"External id": 3659, "Ev Idx": 3658}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672423996, "dur": 0, "args": {"External id": 3660, "Ev Idx": 3659}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424005, "dur": 1, "args": {"External id": 3661, "Ev Idx": 3660}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424005, "dur": 1, "args": {"External id": 3662, "Ev Idx": 3661}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424006, "dur": 1, "args": {"External id": 3663, "Ev Idx": 3662}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424007, "dur": 0, "args": {"External id": 3664, "Ev Idx": 3663}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672424009, "dur": 15, "args": {"External id": 3665, "Ev Idx": 3664}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424010, "dur": 2, "args": {"External id": 3666, "Ev Idx": 3665}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424014, "dur": 3, "args": {"External id": 3667, "Ev Idx": 3666}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424027, "dur": 0, "args": {"External id": 3668, "Ev Idx": 3667}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424027, "dur": 0, "args": {"External id": 3669, "Ev Idx": 3668}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424028, "dur": 0, "args": {"External id": 3670, "Ev Idx": 3669}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424028, "dur": 0, "args": {"External id": 3671, "Ev Idx": 3670}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672424029, "dur": 13, "args": {"External id": 3672, "Ev Idx": 3671}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424030, "dur": 2, "args": {"External id": 3673, "Ev Idx": 3672}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424033, "dur": 2, "args": {"External id": 3674, "Ev Idx": 3673}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672424044, "dur": 11, "args": {"External id": 3675, "Ev Idx": 3674}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424045, "dur": 1, "args": {"External id": 3676, "Ev Idx": 3675}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424047, "dur": 2, "args": {"External id": 3677, "Ev Idx": 3676}}, {"ph": "X", "cat": "cpu_op", "name": "aten::where", "pid": 494, "tid": 494, "ts": 1742522672424057, "dur": 12, "args": {"External id": 3678, "Ev Idx": 3677}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424058, "dur": 1, "args": {"External id": 3679, "Ev Idx": 3678}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424060, "dur": 2, "args": {"External id": 3680, "Ev Idx": 3679}}, {"ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 494, "tid": 494, "ts": 1742522672424073, "dur": 13, "args": {"External id": 3681, "Ev Idx": 3680}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424073, "dur": 1, "args": {"External id": 3682, "Ev Idx": 3681}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424074, "dur": 0, "args": {"External id": 3683, "Ev Idx": 3682}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424074, "dur": 1, "args": {"External id": 3684, "Ev Idx": 3683}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424074, "dur": 1, "args": {"External id": 3685, "Ev Idx": 3684}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672424075, "dur": 11, "args": {"External id": 3686, "Ev Idx": 3685}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672424090, "dur": 2, "args": {"External id": 3687, "Ev Idx": 3686}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424091, "dur": 1, "args": {"External id": 3688, "Ev Idx": 3687}}, {"ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 494, "tid": 494, "ts": 1742522672424148, "dur": 14, "args": {"External id": 3689, "Ev Idx": 3688}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672424150, "dur": 11, "args": {"External id": 3690, "Ev Idx": 3689}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424161, "dur": 0, "args": {"External id": 3691, "Ev Idx": 3690}}, {"ph": "X", "cat": "cpu_op", "name": "aten::full_like", "pid": 494, "tid": 494, "ts": 1742522672424171, "dur": 15, "args": {"External id": 3692, "Ev Idx": 3691}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672424172, "dur": 5, "args": {"External id": 3693, "Ev Idx": 3692}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672424173, "dur": 3, "args": {"External id": 3694, "Ev Idx": 3693}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672424177, "dur": 9, "args": {"External id": 3695, "Ev Idx": 3694}}, {"ph": "X", "cat": "cpu_op", "name": "aten::full_like", "pid": 494, "tid": 494, "ts": 1742522672424190, "dur": 9, "args": {"External id": 3696, "Ev Idx": 3695}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672424190, "dur": 3, "args": {"External id": 3697, "Ev Idx": 3696}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672424191, "dur": 2, "args": {"External id": 3698, "Ev Idx": 3697}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672424194, "dur": 5, "args": {"External id": 3699, "Ev Idx": 3698}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672424204, "dur": 10, "args": {"External id": 3700, "Ev Idx": 3699}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424220, "dur": 15, "args": {"External id": 3701, "Ev Idx": 3700}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 494, "tid": 494, "ts": 1742522672424241, "dur": 13, "args": {"External id": 3702, "Ev Idx": 3701}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424257, "dur": 8, "args": {"External id": 3703, "Ev Idx": 3702}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 494, "tid": 494, "ts": 1742522672424268, "dur": 14, "args": {"External id": 3704, "Ev Idx": 3703}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 494, "tid": 494, "ts": 1742522672424283, "dur": 8, "args": {"External id": 3705, "Ev Idx": 3704}}, {"ph": "X", "cat": "cpu_op", "name": "aten::repeat_interleave", "pid": 494, "tid": 494, "ts": 1742522672424298, "dur": 23, "args": {"External id": 3706, "Ev Idx": 3705}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672424300, "dur": 0, "args": {"External id": 3707, "Ev Idx": 3706}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424301, "dur": 1, "args": {"External id": 3708, "Ev Idx": 3707}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424302, "dur": 0, "args": {"External id": 3709, "Ev Idx": 3708}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672424303, "dur": 1, "args": {"External id": 3710, "Ev Idx": 3709}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424304, "dur": 0, "args": {"External id": 3711, "Ev Idx": 3710}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 494, "tid": 494, "ts": 1742522672424305, "dur": 14, "args": {"External id": 3712, "Ev Idx": 3711}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672424306, "dur": 4, "args": {"External id": 3713, "Ev Idx": 3712}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424306, "dur": 3, "args": {"External id": 3714, "Ev Idx": 3713}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672424310, "dur": 9, "args": {"External id": 3715, "Ev Idx": 3714}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672424319, "dur": 1, "args": {"External id": 3716, "Ev Idx": 3715}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424320, "dur": 0, "args": {"External id": 3717, "Ev Idx": 3716}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 494, "tid": 494, "ts": 1742522672424322, "dur": 11, "args": {"External id": 3718, "Ev Idx": 3717}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424336, "dur": 1, "args": {"External id": 3719, "Ev Idx": 3718}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424336, "dur": 1, "args": {"External id": 3720, "Ev Idx": 3719}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424338, "dur": 8, "args": {"External id": 3721, "Ev Idx": 3720}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424349, "dur": 13, "args": {"External id": 3722, "Ev Idx": 3721}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424350, "dur": 2, "args": {"External id": 3723, "Ev Idx": 3722}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424353, "dur": 9, "args": {"External id": 3724, "Ev Idx": 3723}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424353, "dur": 3, "args": {"External id": 3725, "Ev Idx": 3724}}, {"ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 494, "tid": 494, "ts": 1742522672424370, "dur": 16, "args": {"External id": 3726, "Ev Idx": 3725}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424387, "dur": 10, "args": {"External id": 3727, "Ev Idx": 3726}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424403, "dur": 1, "args": {"External id": 3728, "Ev Idx": 3727}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424404, "dur": 0, "args": {"External id": 3729, "Ev Idx": 3728}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424406, "dur": 11, "args": {"External id": 3730, "Ev Idx": 3729}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424407, "dur": 2, "args": {"External id": 3731, "Ev Idx": 3730}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424409, "dur": 8, "args": {"External id": 3732, "Ev Idx": 3731}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424410, "dur": 2, "args": {"External id": 3733, "Ev Idx": 3732}}, {"ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 494, "tid": 494, "ts": 1742522672424419, "dur": 7, "args": {"External id": 3734, "Ev Idx": 3733}}, {"ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 494, "tid": 494, "ts": 1742522672424428, "dur": 7, "args": {"External id": 3735, "Ev Idx": 3734}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424437, "dur": 7, "args": {"External id": 3736, "Ev Idx": 3735}}, {"ph": "X", "cat": "cpu_op", "name": "aten::repeat_interleave", "pid": 494, "tid": 494, "ts": 1742522672424448, "dur": 18, "args": {"External id": 3737, "Ev Idx": 3736}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672424449, "dur": 0, "args": {"External id": 3738, "Ev Idx": 3737}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424449, "dur": 1, "args": {"External id": 3739, "Ev Idx": 3738}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424450, "dur": 0, "args": {"External id": 3740, "Ev Idx": 3739}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672424451, "dur": 1, "args": {"External id": 3741, "Ev Idx": 3740}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424451, "dur": 0, "args": {"External id": 3742, "Ev Idx": 3741}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 494, "tid": 494, "ts": 1742522672424452, "dur": 12, "args": {"External id": 3743, "Ev Idx": 3742}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672424452, "dur": 4, "args": {"External id": 3744, "Ev Idx": 3743}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424453, "dur": 2, "args": {"External id": 3745, "Ev Idx": 3744}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672424456, "dur": 8, "args": {"External id": 3746, "Ev Idx": 3745}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672424465, "dur": 1, "args": {"External id": 3747, "Ev Idx": 3746}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424465, "dur": 0, "args": {"External id": 3748, "Ev Idx": 3747}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424467, "dur": 8, "args": {"External id": 3749, "Ev Idx": 3748}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424478, "dur": 1, "args": {"External id": 3750, "Ev Idx": 3749}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424479, "dur": 0, "args": {"External id": 3751, "Ev Idx": 3750}}, {"ph": "X", "cat": "cpu_op", "name": "aten::floor_divide", "pid": 494, "tid": 494, "ts": 1742522672424487, "dur": 18, "args": {"External id": 3752, "Ev Idx": 3751}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424491, "dur": 2, "args": {"External id": 3753, "Ev Idx": 3752}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672424507, "dur": 11, "args": {"External id": 3754, "Ev Idx": 3753}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424511, "dur": 0, "args": {"External id": 3755, "Ev Idx": 3754}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424511, "dur": 1, "args": {"External id": 3756, "Ev Idx": 3755}}, {"ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 494, "tid": 494, "ts": 1742522672424521, "dur": 11, "args": {"External id": 3757, "Ev Idx": 3756}}, {"ph": "X", "cat": "cpu_op", "name": "aten::remainder", "pid": 494, "tid": 494, "ts": 1742522672424536, "dur": 19, "args": {"External id": 3758, "Ev Idx": 3757}}, {"ph": "X", "cat": "cpu_op", "name": "aten::remainder", "pid": 494, "tid": 494, "ts": 1742522672424539, "dur": 16, "args": {"External id": 3759, "Ev Idx": 3758}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424556, "dur": 8, "args": {"External id": 3760, "Ev Idx": 3759}}, {"ph": "X", "cat": "cpu_op", "name": "aten::pad", "pid": 494, "tid": 494, "ts": 1742522672424574, "dur": 22, "args": {"External id": 3761, "Ev Idx": 3760}}, {"ph": "X", "cat": "cpu_op", "name": "aten::constant_pad_nd", "pid": 494, "tid": 494, "ts": 1742522672424578, "dur": 17, "args": {"External id": 3762, "Ev Idx": 3761}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 494, "tid": 494, "ts": 1742522672424580, "dur": 15, "args": {"External id": 3763, "Ev Idx": 3762}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672424580, "dur": 3, "args": {"External id": 3764, "Ev Idx": 3763}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672424584, "dur": 11, "args": {"External id": 3765, "Ev Idx": 3764}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424603, "dur": 1, "args": {"External id": 3766, "Ev Idx": 3765}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424603, "dur": 1, "args": {"External id": 3767, "Ev Idx": 3766}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424608, "dur": 1, "args": {"External id": 3768, "Ev Idx": 3767}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424608, "dur": 1, "args": {"External id": 3769, "Ev Idx": 3768}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424612, "dur": 0, "args": {"External id": 3770, "Ev Idx": 3769}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424612, "dur": 0, "args": {"External id": 3771, "Ev Idx": 3770}}, {"ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 494, "tid": 494, "ts": 1742522672424616, "dur": 1, "args": {"External id": 3772, "Ev Idx": 3771}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672424622, "dur": 16, "args": {"External id": 3773, "Ev Idx": 3772}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424642, "dur": 1, "args": {"External id": 3774, "Ev Idx": 3773}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424643, "dur": 0, "args": {"External id": 3775, "Ev Idx": 3774}}, {"ph": "X", "cat": "cpu_op", "name": "aten::rsub", "pid": 494, "tid": 494, "ts": 1742522672424651, "dur": 15, "args": {"External id": 3776, "Ev Idx": 3775}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 494, "tid": 494, "ts": 1742522672424653, "dur": 13, "args": {"External id": 3777, "Ev Idx": 3776}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424669, "dur": 13, "args": {"External id": 3778, "Ev Idx": 3777}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424670, "dur": 3, "args": {"External id": 3779, "Ev Idx": 3778}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424673, "dur": 9, "args": {"External id": 3780, "Ev Idx": 3779}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424674, "dur": 3, "args": {"External id": 3781, "Ev Idx": 3780}}, {"ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 494, "tid": 494, "ts": 1742522672424687, "dur": 9, "args": {"External id": 3782, "Ev Idx": 3781}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424698, "dur": 9, "args": {"External id": 3783, "Ev Idx": 3782}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424710, "dur": 1, "args": {"External id": 3784, "Ev Idx": 3783}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424710, "dur": 1, "args": {"External id": 3785, "Ev Idx": 3784}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424712, "dur": 0, "args": {"External id": 3786, "Ev Idx": 3785}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424712, "dur": 0, "args": {"External id": 3787, "Ev Idx": 3786}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424719, "dur": 11, "args": {"External id": 3788, "Ev Idx": 3787}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424719, "dur": 3, "args": {"External id": 3789, "Ev Idx": 3788}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424722, "dur": 8, "args": {"External id": 3790, "Ev Idx": 3789}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424723, "dur": 1, "args": {"External id": 3791, "Ev Idx": 3790}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424732, "dur": 0, "args": {"External id": 3792, "Ev Idx": 3791}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424732, "dur": 0, "args": {"External id": 3793, "Ev Idx": 3792}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424733, "dur": 1, "args": {"External id": 3794, "Ev Idx": 3793}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424733, "dur": 1, "args": {"External id": 3795, "Ev Idx": 3794}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424735, "dur": 12, "args": {"External id": 3796, "Ev Idx": 3795}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424751, "dur": 11, "args": {"External id": 3797, "Ev Idx": 3796}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 494, "tid": 494, "ts": 1742522672424764, "dur": 8, "args": {"External id": 3798, "Ev Idx": 3797}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424774, "dur": 10, "args": {"External id": 3799, "Ev Idx": 3798}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424774, "dur": 2, "args": {"External id": 3800, "Ev Idx": 3799}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424777, "dur": 7, "args": {"External id": 3801, "Ev Idx": 3800}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672424777, "dur": 2, "args": {"External id": 3802, "Ev Idx": 3801}}, {"ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 494, "tid": 494, "ts": 1742522672424788, "dur": 8, "args": {"External id": 3803, "Ev Idx": 3802}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424797, "dur": 8, "args": {"External id": 3804, "Ev Idx": 3803}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424807, "dur": 1, "args": {"External id": 3805, "Ev Idx": 3804}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424808, "dur": 0, "args": {"External id": 3806, "Ev Idx": 3805}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424809, "dur": 0, "args": {"External id": 3807, "Ev Idx": 3806}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424809, "dur": 0, "args": {"External id": 3808, "Ev Idx": 3807}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424813, "dur": 4, "args": {"External id": 3809, "Ev Idx": 3808}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424814, "dur": 2, "args": {"External id": 3810, "Ev Idx": 3809}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672424816, "dur": 1, "args": {"External id": 3811, "Ev Idx": 3810}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424819, "dur": 0, "args": {"External id": 3812, "Ev Idx": 3811}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424819, "dur": 0, "args": {"External id": 3813, "Ev Idx": 3812}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424820, "dur": 0, "args": {"External id": 3814, "Ev Idx": 3813}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424820, "dur": 0, "args": {"External id": 3815, "Ev Idx": 3814}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672424822, "dur": 3, "args": {"External id": 3816, "Ev Idx": 3815}}, {"ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 494, "tid": 494, "ts": 1742522672424832, "dur": 16, "args": {"External id": 3817, "Ev Idx": 3816}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424833, "dur": 4, "args": {"External id": 3818, "Ev Idx": 3817}}, {"ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 494, "tid": 494, "ts": 1742522672424839, "dur": 8, "args": {"External id": 3819, "Ev Idx": 3818}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672424840, "dur": 7, "args": {"External id": 3820, "Ev Idx": 3819}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424849, "dur": 1, "args": {"External id": 3821, "Ev Idx": 3820}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424850, "dur": 0, "args": {"External id": 3822, "Ev Idx": 3821}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672424852, "dur": 0, "args": {"External id": 3823, "Ev Idx": 3822}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424852, "dur": 0, "args": {"External id": 3824, "Ev Idx": 3823}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672424854, "dur": 12, "args": {"External id": 3825, "Ev Idx": 3824}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424857, "dur": 1, "args": {"External id": 3826, "Ev Idx": 3825}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424858, "dur": 0, "args": {"External id": 3827, "Ev Idx": 3826}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424870, "dur": 0, "args": {"External id": 3828, "Ev Idx": 3827}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424870, "dur": 0, "args": {"External id": 3829, "Ev Idx": 3828}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672424874, "dur": 10, "args": {"External id": 3830, "Ev Idx": 3829}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672424887, "dur": 0, "args": {"External id": 3831, "Ev Idx": 3830}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424887, "dur": 0, "args": {"External id": 3832, "Ev Idx": 3831}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424889, "dur": 1, "args": {"External id": 3833, "Ev Idx": 3832}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424890, "dur": 0, "args": {"External id": 3834, "Ev Idx": 3833}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424890, "dur": 1, "args": {"External id": 3835, "Ev Idx": 3834}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424891, "dur": 0, "args": {"External id": 3836, "Ev Idx": 3835}}, {"ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 494, "tid": 494, "ts": 1742522672424895, "dur": 26, "args": {"External id": 3837, "Ev Idx": 3836}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672424897, "dur": 1, "args": {"External id": 3838, "Ev Idx": 3837}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424898, "dur": 0, "args": {"External id": 3839, "Ev Idx": 3838}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424899, "dur": 3, "args": {"External id": 3840, "Ev Idx": 3839}}, {"ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 494, "tid": 494, "ts": 1742522672424903, "dur": 1, "args": {"External id": 3841, "Ev Idx": 3840}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 494, "tid": 494, "ts": 1742522672424905, "dur": 3, "args": {"External id": 3842, "Ev Idx": 3841}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424907, "dur": 1, "args": {"External id": 3843, "Ev Idx": 3842}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 494, "tid": 494, "ts": 1742522672424908, "dur": 2, "args": {"External id": 3844, "Ev Idx": 3843}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424909, "dur": 0, "args": {"External id": 3845, "Ev Idx": 3844}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 494, "tid": 494, "ts": 1742522672424911, "dur": 1, "args": {"External id": 3846, "Ev Idx": 3845}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672424911, "dur": 1, "args": {"External id": 3847, "Ev Idx": 3846}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424911, "dur": 1, "args": {"External id": 3848, "Ev Idx": 3847}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672424912, "dur": 9, "args": {"External id": 3849, "Ev Idx": 3848}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672424923, "dur": 0, "args": {"External id": 3850, "Ev Idx": 3849}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424923, "dur": 0, "args": {"External id": 3851, "Ev Idx": 3850}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672424925, "dur": 1, "args": {"External id": 3852, "Ev Idx": 3851}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424925, "dur": 0, "args": {"External id": 3853, "Ev Idx": 3852}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672424926, "dur": 1, "args": {"External id": 3854, "Ev Idx": 3853}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424926, "dur": 0, "args": {"External id": 3855, "Ev Idx": 3854}}, {"ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 494, "tid": 494, "ts": 1742522672424928, "dur": 8, "args": {"External id": 3856, "Ev Idx": 3855}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672424929, "dur": 0, "args": {"External id": 3857, "Ev Idx": 3856}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424929, "dur": 0, "args": {"External id": 3858, "Ev Idx": 3857}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672424930, "dur": 2, "args": {"External id": 3859, "Ev Idx": 3858}}, {"ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 494, "tid": 494, "ts": 1742522672424932, "dur": 0, "args": {"External id": 3860, "Ev Idx": 3859}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 494, "tid": 494, "ts": 1742522672424933, "dur": 0, "args": {"External id": 3861, "Ev Idx": 3860}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424933, "dur": 0, "args": {"External id": 3862, "Ev Idx": 3861}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 494, "tid": 494, "ts": 1742522672424934, "dur": 0, "args": {"External id": 3863, "Ev Idx": 3862}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424934, "dur": 0, "args": {"External id": 3864, "Ev Idx": 3863}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 494, "tid": 494, "ts": 1742522672424935, "dur": 0, "args": {"External id": 3865, "Ev Idx": 3864}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672424935, "dur": 0, "args": {"External id": 3866, "Ev Idx": 3865}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424935, "dur": 0, "args": {"External id": 3867, "Ev Idx": 3866}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672424936, "dur": 0, "args": {"External id": 3868, "Ev Idx": 3867}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424938, "dur": 1, "args": {"External id": 3869, "Ev Idx": 3868}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424938, "dur": 1, "args": {"External id": 3870, "Ev Idx": 3869}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672424940, "dur": 14, "args": {"External id": 3871, "Ev Idx": 3870}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424944, "dur": 0, "args": {"External id": 3872, "Ev Idx": 3871}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424944, "dur": 0, "args": {"External id": 3873, "Ev Idx": 3872}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424958, "dur": 1, "args": {"External id": 3874, "Ev Idx": 3873}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424959, "dur": 0, "args": {"External id": 3875, "Ev Idx": 3874}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672424963, "dur": 10, "args": {"External id": 3876, "Ev Idx": 3875}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424976, "dur": 0, "args": {"External id": 3877, "Ev Idx": 3876}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424976, "dur": 0, "args": {"External id": 3878, "Ev Idx": 3877}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672424977, "dur": 3, "args": {"External id": 3879, "Ev Idx": 3878}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672424983, "dur": 0, "args": {"External id": 3880, "Ev Idx": 3879}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672424983, "dur": 0, "args": {"External id": 3881, "Ev Idx": 3880}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672424990, "dur": 2, "args": {"External id": 3882, "Ev Idx": 3881}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672424991, "dur": 1, "args": {"External id": 3883, "Ev Idx": 3882}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672424994, "dur": 4, "args": {"External id": 3884, "Ev Idx": 3883}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672424995, "dur": 2, "args": {"External id": 3885, "Ev Idx": 3884}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672424998, "dur": 3, "args": {"External id": 3886, "Ev Idx": 3885}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672424998, "dur": 2, "args": {"External id": 3887, "Ev Idx": 3886}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425001, "dur": 2, "args": {"External id": 3888, "Ev Idx": 3887}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425001, "dur": 2, "args": {"External id": 3889, "Ev Idx": 3888}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425004, "dur": 2, "args": {"External id": 3890, "Ev Idx": 3889}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425004, "dur": 2, "args": {"External id": 3891, "Ev Idx": 3890}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425007, "dur": 2, "args": {"External id": 3892, "Ev Idx": 3891}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425007, "dur": 2, "args": {"External id": 3893, "Ev Idx": 3892}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425010, "dur": 2, "args": {"External id": 3894, "Ev Idx": 3893}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425010, "dur": 2, "args": {"External id": 3895, "Ev Idx": 3894}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425013, "dur": 2, "args": {"External id": 3896, "Ev Idx": 3895}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425013, "dur": 2, "args": {"External id": 3897, "Ev Idx": 3896}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425015, "dur": 3, "args": {"External id": 3898, "Ev Idx": 3897}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425015, "dur": 3, "args": {"External id": 3899, "Ev Idx": 3898}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425018, "dur": 3, "args": {"External id": 3900, "Ev Idx": 3899}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425018, "dur": 3, "args": {"External id": 3901, "Ev Idx": 3900}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425022, "dur": 2, "args": {"External id": 3902, "Ev Idx": 3901}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425022, "dur": 2, "args": {"External id": 3903, "Ev Idx": 3902}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425024, "dur": 3, "args": {"External id": 3904, "Ev Idx": 3903}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425025, "dur": 2, "args": {"External id": 3905, "Ev Idx": 3904}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425027, "dur": 2, "args": {"External id": 3906, "Ev Idx": 3905}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425027, "dur": 2, "args": {"External id": 3907, "Ev Idx": 3906}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425030, "dur": 2, "args": {"External id": 3908, "Ev Idx": 3907}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425030, "dur": 2, "args": {"External id": 3909, "Ev Idx": 3908}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425033, "dur": 2, "args": {"External id": 3910, "Ev Idx": 3909}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425033, "dur": 2, "args": {"External id": 3911, "Ev Idx": 3910}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425035, "dur": 2, "args": {"External id": 3912, "Ev Idx": 3911}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425035, "dur": 2, "args": {"External id": 3913, "Ev Idx": 3912}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425038, "dur": 2, "args": {"External id": 3914, "Ev Idx": 3913}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425038, "dur": 2, "args": {"External id": 3915, "Ev Idx": 3914}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425040, "dur": 3, "args": {"External id": 3916, "Ev Idx": 3915}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425041, "dur": 2, "args": {"External id": 3917, "Ev Idx": 3916}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425043, "dur": 3, "args": {"External id": 3918, "Ev Idx": 3917}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425044, "dur": 2, "args": {"External id": 3919, "Ev Idx": 3918}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425046, "dur": 3, "args": {"External id": 3920, "Ev Idx": 3919}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425047, "dur": 2, "args": {"External id": 3921, "Ev Idx": 3920}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425049, "dur": 3, "args": {"External id": 3922, "Ev Idx": 3921}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425050, "dur": 1, "args": {"External id": 3923, "Ev Idx": 3922}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425052, "dur": 2, "args": {"External id": 3924, "Ev Idx": 3923}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425052, "dur": 2, "args": {"External id": 3925, "Ev Idx": 3924}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425055, "dur": 2, "args": {"External id": 3926, "Ev Idx": 3925}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425055, "dur": 2, "args": {"External id": 3927, "Ev Idx": 3926}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425057, "dur": 2, "args": {"External id": 3928, "Ev Idx": 3927}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425057, "dur": 2, "args": {"External id": 3929, "Ev Idx": 3928}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425060, "dur": 2, "args": {"External id": 3930, "Ev Idx": 3929}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425060, "dur": 2, "args": {"External id": 3931, "Ev Idx": 3930}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425062, "dur": 3, "args": {"External id": 3932, "Ev Idx": 3931}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425063, "dur": 2, "args": {"External id": 3933, "Ev Idx": 3932}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425065, "dur": 2, "args": {"External id": 3934, "Ev Idx": 3933}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425065, "dur": 2, "args": {"External id": 3935, "Ev Idx": 3934}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425068, "dur": 2, "args": {"External id": 3936, "Ev Idx": 3935}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425068, "dur": 2, "args": {"External id": 3937, "Ev Idx": 3936}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425070, "dur": 3, "args": {"External id": 3938, "Ev Idx": 3937}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425071, "dur": 2, "args": {"External id": 3939, "Ev Idx": 3938}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425073, "dur": 2, "args": {"External id": 3940, "Ev Idx": 3939}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425073, "dur": 2, "args": {"External id": 3941, "Ev Idx": 3940}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425076, "dur": 2, "args": {"External id": 3942, "Ev Idx": 3941}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425076, "dur": 2, "args": {"External id": 3943, "Ev Idx": 3942}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425079, "dur": 2, "args": {"External id": 3944, "Ev Idx": 3943}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425079, "dur": 2, "args": {"External id": 3945, "Ev Idx": 3944}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425081, "dur": 3, "args": {"External id": 3946, "Ev Idx": 3945}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425081, "dur": 2, "args": {"External id": 3947, "Ev Idx": 3946}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425084, "dur": 2, "args": {"External id": 3948, "Ev Idx": 3947}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425084, "dur": 2, "args": {"External id": 3949, "Ev Idx": 3948}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425086, "dur": 3, "args": {"External id": 3950, "Ev Idx": 3949}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425087, "dur": 2, "args": {"External id": 3951, "Ev Idx": 3950}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425089, "dur": 3, "args": {"External id": 3952, "Ev Idx": 3951}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425089, "dur": 3, "args": {"External id": 3953, "Ev Idx": 3952}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425092, "dur": 3, "args": {"External id": 3954, "Ev Idx": 3953}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425093, "dur": 2, "args": {"External id": 3955, "Ev Idx": 3954}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425095, "dur": 3, "args": {"External id": 3956, "Ev Idx": 3955}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425095, "dur": 3, "args": {"External id": 3957, "Ev Idx": 3956}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425098, "dur": 2, "args": {"External id": 3958, "Ev Idx": 3957}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425098, "dur": 2, "args": {"External id": 3959, "Ev Idx": 3958}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425101, "dur": 2, "args": {"External id": 3960, "Ev Idx": 3959}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425101, "dur": 2, "args": {"External id": 3961, "Ev Idx": 3960}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425103, "dur": 3, "args": {"External id": 3962, "Ev Idx": 3961}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425103, "dur": 2, "args": {"External id": 3963, "Ev Idx": 3962}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425106, "dur": 2, "args": {"External id": 3964, "Ev Idx": 3963}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425106, "dur": 2, "args": {"External id": 3965, "Ev Idx": 3964}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425109, "dur": 2, "args": {"External id": 3966, "Ev Idx": 3965}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425109, "dur": 2, "args": {"External id": 3967, "Ev Idx": 3966}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425112, "dur": 2, "args": {"External id": 3968, "Ev Idx": 3967}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425112, "dur": 2, "args": {"External id": 3969, "Ev Idx": 3968}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425115, "dur": 2, "args": {"External id": 3970, "Ev Idx": 3969}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425115, "dur": 2, "args": {"External id": 3971, "Ev Idx": 3970}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425117, "dur": 3, "args": {"External id": 3972, "Ev Idx": 3971}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425118, "dur": 2, "args": {"External id": 3973, "Ev Idx": 3972}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425120, "dur": 3, "args": {"External id": 3974, "Ev Idx": 3973}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425120, "dur": 3, "args": {"External id": 3975, "Ev Idx": 3974}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425123, "dur": 3, "args": {"External id": 3976, "Ev Idx": 3975}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425123, "dur": 2, "args": {"External id": 3977, "Ev Idx": 3976}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425126, "dur": 2, "args": {"External id": 3978, "Ev Idx": 3977}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425126, "dur": 2, "args": {"External id": 3979, "Ev Idx": 3978}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425128, "dur": 3, "args": {"External id": 3980, "Ev Idx": 3979}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425129, "dur": 2, "args": {"External id": 3981, "Ev Idx": 3980}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425131, "dur": 2, "args": {"External id": 3982, "Ev Idx": 3981}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425131, "dur": 2, "args": {"External id": 3983, "Ev Idx": 3982}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425134, "dur": 2, "args": {"External id": 3984, "Ev Idx": 3983}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425134, "dur": 2, "args": {"External id": 3985, "Ev Idx": 3984}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425136, "dur": 3, "args": {"External id": 3986, "Ev Idx": 3985}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425136, "dur": 2, "args": {"External id": 3987, "Ev Idx": 3986}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425139, "dur": 2, "args": {"External id": 3988, "Ev Idx": 3987}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425139, "dur": 2, "args": {"External id": 3989, "Ev Idx": 3988}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425141, "dur": 3, "args": {"External id": 3990, "Ev Idx": 3989}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425142, "dur": 2, "args": {"External id": 3991, "Ev Idx": 3990}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425144, "dur": 3, "args": {"External id": 3992, "Ev Idx": 3991}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425145, "dur": 2, "args": {"External id": 3993, "Ev Idx": 3992}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425147, "dur": 3, "args": {"External id": 3994, "Ev Idx": 3993}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425148, "dur": 2, "args": {"External id": 3995, "Ev Idx": 3994}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425150, "dur": 3, "args": {"External id": 3996, "Ev Idx": 3995}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425150, "dur": 2, "args": {"External id": 3997, "Ev Idx": 3996}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425153, "dur": 2, "args": {"External id": 3998, "Ev Idx": 3997}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425153, "dur": 2, "args": {"External id": 3999, "Ev Idx": 3998}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425156, "dur": 2, "args": {"External id": 4000, "Ev Idx": 3999}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425156, "dur": 2, "args": {"External id": 4001, "Ev Idx": 4000}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425159, "dur": 2, "args": {"External id": 4002, "Ev Idx": 4001}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425159, "dur": 2, "args": {"External id": 4003, "Ev Idx": 4002}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425161, "dur": 3, "args": {"External id": 4004, "Ev Idx": 4003}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425161, "dur": 2, "args": {"External id": 4005, "Ev Idx": 4004}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425164, "dur": 2, "args": {"External id": 4006, "Ev Idx": 4005}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425164, "dur": 2, "args": {"External id": 4007, "Ev Idx": 4006}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425166, "dur": 3, "args": {"External id": 4008, "Ev Idx": 4007}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425167, "dur": 2, "args": {"External id": 4009, "Ev Idx": 4008}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425169, "dur": 3, "args": {"External id": 4010, "Ev Idx": 4009}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425169, "dur": 2, "args": {"External id": 4011, "Ev Idx": 4010}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425172, "dur": 2, "args": {"External id": 4012, "Ev Idx": 4011}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425172, "dur": 2, "args": {"External id": 4013, "Ev Idx": 4012}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425175, "dur": 2, "args": {"External id": 4014, "Ev Idx": 4013}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425175, "dur": 2, "args": {"External id": 4015, "Ev Idx": 4014}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425177, "dur": 2, "args": {"External id": 4016, "Ev Idx": 4015}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425177, "dur": 2, "args": {"External id": 4017, "Ev Idx": 4016}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425180, "dur": 2, "args": {"External id": 4018, "Ev Idx": 4017}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425180, "dur": 2, "args": {"External id": 4019, "Ev Idx": 4018}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425182, "dur": 3, "args": {"External id": 4020, "Ev Idx": 4019}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425183, "dur": 2, "args": {"External id": 4021, "Ev Idx": 4020}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425185, "dur": 2, "args": {"External id": 4022, "Ev Idx": 4021}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425185, "dur": 2, "args": {"External id": 4023, "Ev Idx": 4022}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425188, "dur": 2, "args": {"External id": 4024, "Ev Idx": 4023}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425188, "dur": 2, "args": {"External id": 4025, "Ev Idx": 4024}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425190, "dur": 3, "args": {"External id": 4026, "Ev Idx": 4025}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425190, "dur": 2, "args": {"External id": 4027, "Ev Idx": 4026}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425193, "dur": 2, "args": {"External id": 4028, "Ev Idx": 4027}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425193, "dur": 2, "args": {"External id": 4029, "Ev Idx": 4028}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425196, "dur": 2, "args": {"External id": 4030, "Ev Idx": 4029}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425196, "dur": 2, "args": {"External id": 4031, "Ev Idx": 4030}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425198, "dur": 3, "args": {"External id": 4032, "Ev Idx": 4031}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425198, "dur": 3, "args": {"External id": 4033, "Ev Idx": 4032}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425201, "dur": 2, "args": {"External id": 4034, "Ev Idx": 4033}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425201, "dur": 2, "args": {"External id": 4035, "Ev Idx": 4034}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425204, "dur": 2, "args": {"External id": 4036, "Ev Idx": 4035}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425204, "dur": 2, "args": {"External id": 4037, "Ev Idx": 4036}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425206, "dur": 3, "args": {"External id": 4038, "Ev Idx": 4037}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425206, "dur": 2, "args": {"External id": 4039, "Ev Idx": 4038}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425209, "dur": 2, "args": {"External id": 4040, "Ev Idx": 4039}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425209, "dur": 2, "args": {"External id": 4041, "Ev Idx": 4040}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425212, "dur": 2, "args": {"External id": 4042, "Ev Idx": 4041}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425212, "dur": 2, "args": {"External id": 4043, "Ev Idx": 4042}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425214, "dur": 2, "args": {"External id": 4044, "Ev Idx": 4043}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425214, "dur": 2, "args": {"External id": 4045, "Ev Idx": 4044}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425217, "dur": 2, "args": {"External id": 4046, "Ev Idx": 4045}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425217, "dur": 2, "args": {"External id": 4047, "Ev Idx": 4046}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425220, "dur": 2, "args": {"External id": 4048, "Ev Idx": 4047}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425220, "dur": 2, "args": {"External id": 4049, "Ev Idx": 4048}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425223, "dur": 2, "args": {"External id": 4050, "Ev Idx": 4049}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425223, "dur": 2, "args": {"External id": 4051, "Ev Idx": 4050}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425225, "dur": 3, "args": {"External id": 4052, "Ev Idx": 4051}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425225, "dur": 2, "args": {"External id": 4053, "Ev Idx": 4052}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425228, "dur": 2, "args": {"External id": 4054, "Ev Idx": 4053}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425228, "dur": 2, "args": {"External id": 4055, "Ev Idx": 4054}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425231, "dur": 2, "args": {"External id": 4056, "Ev Idx": 4055}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425231, "dur": 2, "args": {"External id": 4057, "Ev Idx": 4056}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425233, "dur": 3, "args": {"External id": 4058, "Ev Idx": 4057}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425234, "dur": 1, "args": {"External id": 4059, "Ev Idx": 4058}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425236, "dur": 2, "args": {"External id": 4060, "Ev Idx": 4059}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425236, "dur": 2, "args": {"External id": 4061, "Ev Idx": 4060}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425239, "dur": 2, "args": {"External id": 4062, "Ev Idx": 4061}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425239, "dur": 2, "args": {"External id": 4063, "Ev Idx": 4062}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425241, "dur": 3, "args": {"External id": 4064, "Ev Idx": 4063}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425242, "dur": 2, "args": {"External id": 4065, "Ev Idx": 4064}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425244, "dur": 3, "args": {"External id": 4066, "Ev Idx": 4065}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425244, "dur": 2, "args": {"External id": 4067, "Ev Idx": 4066}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425247, "dur": 2, "args": {"External id": 4068, "Ev Idx": 4067}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425247, "dur": 2, "args": {"External id": 4069, "Ev Idx": 4068}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425249, "dur": 3, "args": {"External id": 4070, "Ev Idx": 4069}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425250, "dur": 2, "args": {"External id": 4071, "Ev Idx": 4070}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425252, "dur": 2, "args": {"External id": 4072, "Ev Idx": 4071}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425252, "dur": 2, "args": {"External id": 4073, "Ev Idx": 4072}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425255, "dur": 2, "args": {"External id": 4074, "Ev Idx": 4073}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425255, "dur": 2, "args": {"External id": 4075, "Ev Idx": 4074}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425257, "dur": 3, "args": {"External id": 4076, "Ev Idx": 4075}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425257, "dur": 2, "args": {"External id": 4077, "Ev Idx": 4076}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425260, "dur": 2, "args": {"External id": 4078, "Ev Idx": 4077}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425260, "dur": 2, "args": {"External id": 4079, "Ev Idx": 4078}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425263, "dur": 2, "args": {"External id": 4080, "Ev Idx": 4079}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425263, "dur": 2, "args": {"External id": 4081, "Ev Idx": 4080}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425265, "dur": 3, "args": {"External id": 4082, "Ev Idx": 4081}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425265, "dur": 2, "args": {"External id": 4083, "Ev Idx": 4082}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425268, "dur": 2, "args": {"External id": 4084, "Ev Idx": 4083}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425268, "dur": 2, "args": {"External id": 4085, "Ev Idx": 4084}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425271, "dur": 2, "args": {"External id": 4086, "Ev Idx": 4085}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425271, "dur": 2, "args": {"External id": 4087, "Ev Idx": 4086}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425273, "dur": 3, "args": {"External id": 4088, "Ev Idx": 4087}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425274, "dur": 1, "args": {"External id": 4089, "Ev Idx": 4088}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425276, "dur": 2, "args": {"External id": 4090, "Ev Idx": 4089}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425276, "dur": 2, "args": {"External id": 4091, "Ev Idx": 4090}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425279, "dur": 2, "args": {"External id": 4092, "Ev Idx": 4091}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425279, "dur": 2, "args": {"External id": 4093, "Ev Idx": 4092}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425281, "dur": 2, "args": {"External id": 4094, "Ev Idx": 4093}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425281, "dur": 2, "args": {"External id": 4095, "Ev Idx": 4094}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425284, "dur": 5, "args": {"External id": 4096, "Ev Idx": 4095}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425287, "dur": 2, "args": {"External id": 4097, "Ev Idx": 4096}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425289, "dur": 3, "args": {"External id": 4098, "Ev Idx": 4097}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425290, "dur": 2, "args": {"External id": 4099, "Ev Idx": 4098}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425292, "dur": 2, "args": {"External id": 4100, "Ev Idx": 4099}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425292, "dur": 2, "args": {"External id": 4101, "Ev Idx": 4100}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425295, "dur": 2, "args": {"External id": 4102, "Ev Idx": 4101}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425295, "dur": 2, "args": {"External id": 4103, "Ev Idx": 4102}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425297, "dur": 3, "args": {"External id": 4104, "Ev Idx": 4103}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425298, "dur": 2, "args": {"External id": 4105, "Ev Idx": 4104}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425300, "dur": 3, "args": {"External id": 4106, "Ev Idx": 4105}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425301, "dur": 2, "args": {"External id": 4107, "Ev Idx": 4106}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425303, "dur": 3, "args": {"External id": 4108, "Ev Idx": 4107}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425304, "dur": 2, "args": {"External id": 4109, "Ev Idx": 4108}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425306, "dur": 3, "args": {"External id": 4110, "Ev Idx": 4109}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425307, "dur": 2, "args": {"External id": 4111, "Ev Idx": 4110}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425309, "dur": 2, "args": {"External id": 4112, "Ev Idx": 4111}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425309, "dur": 2, "args": {"External id": 4113, "Ev Idx": 4112}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425312, "dur": 2, "args": {"External id": 4114, "Ev Idx": 4113}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425312, "dur": 2, "args": {"External id": 4115, "Ev Idx": 4114}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425315, "dur": 2, "args": {"External id": 4116, "Ev Idx": 4115}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425315, "dur": 2, "args": {"External id": 4117, "Ev Idx": 4116}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425317, "dur": 3, "args": {"External id": 4118, "Ev Idx": 4117}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425318, "dur": 1, "args": {"External id": 4119, "Ev Idx": 4118}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425320, "dur": 2, "args": {"External id": 4120, "Ev Idx": 4119}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425320, "dur": 2, "args": {"External id": 4121, "Ev Idx": 4120}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425323, "dur": 2, "args": {"External id": 4122, "Ev Idx": 4121}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425323, "dur": 2, "args": {"External id": 4123, "Ev Idx": 4122}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425325, "dur": 3, "args": {"External id": 4124, "Ev Idx": 4123}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425326, "dur": 2, "args": {"External id": 4125, "Ev Idx": 4124}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425328, "dur": 2, "args": {"External id": 4126, "Ev Idx": 4125}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425328, "dur": 2, "args": {"External id": 4127, "Ev Idx": 4126}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425331, "dur": 2, "args": {"External id": 4128, "Ev Idx": 4127}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425331, "dur": 2, "args": {"External id": 4129, "Ev Idx": 4128}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425333, "dur": 2, "args": {"External id": 4130, "Ev Idx": 4129}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425333, "dur": 2, "args": {"External id": 4131, "Ev Idx": 4130}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425336, "dur": 2, "args": {"External id": 4132, "Ev Idx": 4131}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425336, "dur": 2, "args": {"External id": 4133, "Ev Idx": 4132}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425338, "dur": 3, "args": {"External id": 4134, "Ev Idx": 4133}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425339, "dur": 2, "args": {"External id": 4135, "Ev Idx": 4134}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425341, "dur": 2, "args": {"External id": 4136, "Ev Idx": 4135}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425341, "dur": 2, "args": {"External id": 4137, "Ev Idx": 4136}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425344, "dur": 2, "args": {"External id": 4138, "Ev Idx": 4137}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425344, "dur": 2, "args": {"External id": 4139, "Ev Idx": 4138}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425376, "dur": 3, "args": {"External id": 4140, "Ev Idx": 4139}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425376, "dur": 3, "args": {"External id": 4141, "Ev Idx": 4140}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425381, "dur": 3, "args": {"External id": 4142, "Ev Idx": 4141}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425381, "dur": 3, "args": {"External id": 4143, "Ev Idx": 4142}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425385, "dur": 3, "args": {"External id": 4144, "Ev Idx": 4143}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425385, "dur": 3, "args": {"External id": 4145, "Ev Idx": 4144}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 494, "tid": 494, "ts": 1742522672425389, "dur": 16, "args": {"External id": 4146, "Ev Idx": 4145}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425390, "dur": 2, "args": {"External id": 4147, "Ev Idx": 4146}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672425393, "dur": 11, "args": {"External id": 4148, "Ev Idx": 4147}}, {"ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 494, "tid": 494, "ts": 1742522672425406, "dur": 1, "args": {"External id": 4149, "Ev Idx": 4148}}, {"ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 494, "tid": 494, "ts": 1742522672425412, "dur": 15, "args": {"External id": 4150, "Ev Idx": 4149}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672425412, "dur": 4, "args": {"External id": 4151, "Ev Idx": 4150}}, {"ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 494, "tid": 494, "ts": 1742522672425417, "dur": 10, "args": {"External id": 4152, "Ev Idx": 4151}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672425418, "dur": 8, "args": {"External id": 4153, "Ev Idx": 4152}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672425430, "dur": 1, "args": {"External id": 4154, "Ev Idx": 4153}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672425431, "dur": 0, "args": {"External id": 4155, "Ev Idx": 4154}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672425434, "dur": 7, "args": {"External id": 4156, "Ev Idx": 4155}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672425450, "dur": 1, "args": {"External id": 4157, "Ev Idx": 4156}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672425450, "dur": 0, "args": {"External id": 4158, "Ev Idx": 4157}}, {"ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 494, "tid": 494, "ts": 1742522672425452, "dur": 10, "args": {"External id": 4159, "Ev Idx": 4158}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425452, "dur": 3, "args": {"External id": 4160, "Ev Idx": 4159}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672425455, "dur": 7, "args": {"External id": 4161, "Ev Idx": 4160}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672425465, "dur": 0, "args": {"External id": 4162, "Ev Idx": 4161}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672425465, "dur": 0, "args": {"External id": 4163, "Ev Idx": 4162}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672425466, "dur": 5, "args": {"External id": 4164, "Ev Idx": 4163}}, {"ph": "X", "cat": "cpu_op", "name": "aten::index_put_", "pid": 494, "tid": 494, "ts": 1742522672425479, "dur": 23, "args": {"External id": 4165, "Ev Idx": 4164}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_index_put_impl_", "pid": 494, "tid": 494, "ts": 1742522672425481, "dur": 20, "args": {"External id": 4166, "Ev Idx": 4165}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672425487, "dur": 1, "args": {"External id": 4167, "Ev Idx": 4166}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672425489, "dur": 1, "args": {"External id": 4168, "Ev Idx": 4167}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425489, "dur": 1, "args": {"External id": 4169, "Ev Idx": 4168}}, {"ph": "X", "cat": "cpu_op", "name": "aten::embedding", "pid": 494, "tid": 494, "ts": 1742522672425532, "dur": 21, "args": {"External id": 4170, "Ev Idx": 4169}}, {"ph": "X", "cat": "cpu_op", "name": "aten::index_select", "pid": 494, "tid": 494, "ts": 1742522672425535, "dur": 18, "args": {"External id": 4171, "Ev Idx": 4170}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672425536, "dur": 3, "args": {"External id": 4172, "Ev Idx": 4171}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672425541, "dur": 3, "args": {"External id": 4173, "Ev Idx": 4172}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425573, "dur": 0, "args": {"External id": 4174, "Ev Idx": 4173}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425576, "dur": 4, "args": {"External id": 4175, "Ev Idx": 4174}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425577, "dur": 3, "args": {"External id": 4176, "Ev Idx": 4175}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425648, "dur": 1, "args": {"External id": 4177, "Ev Idx": 4176}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425660, "dur": 0, "args": {"External id": 4178, "Ev Idx": 4177}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425662, "dur": 4, "args": {"External id": 4179, "Ev Idx": 4178}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425663, "dur": 3, "args": {"External id": 4180, "Ev Idx": 4179}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425712, "dur": 0, "args": {"External id": 4181, "Ev Idx": 4180}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672425717, "dur": 15, "args": {"External id": 4182, "Ev Idx": 4181}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672425752, "dur": 3, "args": {"External id": 4183, "Ev Idx": 4182}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425757, "dur": 0, "args": {"External id": 4184, "Ev Idx": 4183}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425758, "dur": 0, "args": {"External id": 4185, "Ev Idx": 4184}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425824, "dur": 0, "args": {"External id": 4186, "Ev Idx": 4185}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672425828, "dur": 5, "args": {"External id": 4187, "Ev Idx": 4186}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672425828, "dur": 5, "args": {"External id": 4188, "Ev Idx": 4187}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672425836, "dur": 4, "args": {"External id": 4189, "Ev Idx": 4188}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672425844, "dur": 0, "args": {"External id": 4190, "Ev Idx": 4189}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672425849, "dur": 2, "args": {"External id": 4191, "Ev Idx": 4190}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672425850, "dur": 1, "args": {"External id": 4192, "Ev Idx": 4191}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672425911, "dur": 0, "args": {"External id": 4193, "Ev Idx": 4192}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672425946, "dur": 5, "args": {"External id": 4194, "Ev Idx": 4193}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426098, "dur": 2, "args": {"External id": 4195, "Ev Idx": 4194}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426099, "dur": 1, "args": {"External id": 4196, "Ev Idx": 4195}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426101, "dur": 1, "args": {"External id": 4197, "Ev Idx": 4196}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426101, "dur": 0, "args": {"External id": 4198, "Ev Idx": 4197}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426111, "dur": 1, "args": {"External id": 4199, "Ev Idx": 4198}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426114, "dur": 1, "args": {"External id": 4200, "Ev Idx": 4199}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426167, "dur": 1, "args": {"External id": 4201, "Ev Idx": 4200}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426172, "dur": 1, "args": {"External id": 4202, "Ev Idx": 4201}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426173, "dur": 0, "args": {"External id": 4203, "Ev Idx": 4202}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426174, "dur": 0, "args": {"External id": 4204, "Ev Idx": 4203}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426174, "dur": 0, "args": {"External id": 4205, "Ev Idx": 4204}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426177, "dur": 0, "args": {"External id": 4206, "Ev Idx": 4205}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426177, "dur": 0, "args": {"External id": 4207, "Ev Idx": 4206}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426178, "dur": 0, "args": {"External id": 4208, "Ev Idx": 4207}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426178, "dur": 0, "args": {"External id": 4209, "Ev Idx": 4208}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672426203, "dur": 6, "args": {"External id": 4210, "Ev Idx": 4209}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672426204, "dur": 5, "args": {"External id": 4211, "Ev Idx": 4210}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426211, "dur": 4, "args": {"External id": 4212, "Ev Idx": 4211}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426216, "dur": 0, "args": {"External id": 4213, "Ev Idx": 4212}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426234, "dur": 3, "args": {"External id": 4214, "Ev Idx": 4213}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426333, "dur": 1, "args": {"External id": 4215, "Ev Idx": 4214}}, {"ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 494, "tid": 494, "ts": 1742522672426346, "dur": 4, "args": {"External id": 4216, "Ev Idx": 4215}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426348, "dur": 1, "args": {"External id": 4217, "Ev Idx": 4216}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426349, "dur": 1, "args": {"External id": 4218, "Ev Idx": 4217}}, {"ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 494, "tid": 494, "ts": 1742522672426353, "dur": 2, "args": {"External id": 4219, "Ev Idx": 4218}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426354, "dur": 0, "args": {"External id": 4220, "Ev Idx": 4219}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426355, "dur": 0, "args": {"External id": 4221, "Ev Idx": 4220}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672426357, "dur": 1, "args": {"External id": 4222, "Ev Idx": 4221}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426357, "dur": 0, "args": {"External id": 4223, "Ev Idx": 4222}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426362, "dur": 4, "args": {"External id": 4224, "Ev Idx": 4223}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426371, "dur": 1, "args": {"External id": 4225, "Ev Idx": 4224}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426371, "dur": 0, "args": {"External id": 4226, "Ev Idx": 4225}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426372, "dur": 1, "args": {"External id": 4227, "Ev Idx": 4226}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426372, "dur": 1, "args": {"External id": 4228, "Ev Idx": 4227}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426373, "dur": 1, "args": {"External id": 4229, "Ev Idx": 4228}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426374, "dur": 0, "args": {"External id": 4230, "Ev Idx": 4229}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426415, "dur": 1, "args": {"External id": 4231, "Ev Idx": 4230}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426415, "dur": 1, "args": {"External id": 4232, "Ev Idx": 4231}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426416, "dur": 1, "args": {"External id": 4233, "Ev Idx": 4232}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426417, "dur": 0, "args": {"External id": 4234, "Ev Idx": 4233}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426417, "dur": 1, "args": {"External id": 4235, "Ev Idx": 4234}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426418, "dur": 0, "args": {"External id": 4236, "Ev Idx": 4235}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unflatten", "pid": 494, "tid": 494, "ts": 1742522672426464, "dur": 5, "args": {"External id": 4237, "Ev Idx": 4236}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426468, "dur": 1, "args": {"External id": 4238, "Ev Idx": 4237}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426481, "dur": 4, "args": {"External id": 4239, "Ev Idx": 4238}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426487, "dur": 3, "args": {"External id": 4240, "Ev Idx": 4239}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426509, "dur": 1, "args": {"External id": 4241, "Ev Idx": 4240}}, {"ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 494, "tid": 494, "ts": 1742522672426511, "dur": 2, "args": {"External id": 4242, "Ev Idx": 4241}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426513, "dur": 0, "args": {"External id": 4243, "Ev Idx": 4242}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672426514, "dur": 1, "args": {"External id": 4244, "Ev Idx": 4243}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426514, "dur": 1, "args": {"External id": 4245, "Ev Idx": 4244}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426516, "dur": 3, "args": {"External id": 4246, "Ev Idx": 4245}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426520, "dur": 3, "args": {"External id": 4247, "Ev Idx": 4246}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426524, "dur": 2, "args": {"External id": 4248, "Ev Idx": 4247}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426527, "dur": 3, "args": {"External id": 4249, "Ev Idx": 4248}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426541, "dur": 0, "args": {"External id": 4250, "Ev Idx": 4249}}, {"ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 494, "tid": 494, "ts": 1742522672426542, "dur": 1, "args": {"External id": 4251, "Ev Idx": 4250}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426542, "dur": 0, "args": {"External id": 4252, "Ev Idx": 4251}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672426543, "dur": 1, "args": {"External id": 4253, "Ev Idx": 4252}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426543, "dur": 1, "args": {"External id": 4254, "Ev Idx": 4253}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426544, "dur": 1, "args": {"External id": 4255, "Ev Idx": 4254}}, {"ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 494, "tid": 494, "ts": 1742522672426545, "dur": 1, "args": {"External id": 4256, "Ev Idx": 4255}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426545, "dur": 0, "args": {"External id": 4257, "Ev Idx": 4256}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672426546, "dur": 6, "args": {"External id": 4258, "Ev Idx": 4257}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_reshape_alias", "pid": 494, "tid": 494, "ts": 1742522672426551, "dur": 1, "args": {"External id": 4259, "Ev Idx": 4258}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672426559, "dur": 1, "args": {"External id": 4260, "Ev Idx": 4259}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426560, "dur": 0, "args": {"External id": 4261, "Ev Idx": 4260}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426568, "dur": 3, "args": {"External id": 4262, "Ev Idx": 4261}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426592, "dur": 0, "args": {"External id": 4263, "Ev Idx": 4262}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672426607, "dur": 5, "args": {"External id": 4264, "Ev Idx": 4263}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672426608, "dur": 4, "args": {"External id": 4265, "Ev Idx": 4264}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426613, "dur": 2, "args": {"External id": 4266, "Ev Idx": 4265}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426616, "dur": 1, "args": {"External id": 4267, "Ev Idx": 4266}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426630, "dur": 3, "args": {"External id": 4268, "Ev Idx": 4267}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426748, "dur": 1, "args": {"External id": 4269, "Ev Idx": 4268}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426751, "dur": 1, "args": {"External id": 4270, "Ev Idx": 4269}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672426753, "dur": 5, "args": {"External id": 4271, "Ev Idx": 4270}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672426754, "dur": 4, "args": {"External id": 4272, "Ev Idx": 4271}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672426759, "dur": 3, "args": {"External id": 4273, "Ev Idx": 4272}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672426760, "dur": 2, "args": {"External id": 4274, "Ev Idx": 4273}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426825, "dur": 1, "args": {"External id": 4275, "Ev Idx": 4274}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426827, "dur": 0, "args": {"External id": 4276, "Ev Idx": 4275}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426850, "dur": 4, "args": {"External id": 4277, "Ev Idx": 4276}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426855, "dur": 1, "args": {"External id": 4278, "Ev Idx": 4277}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672426856, "dur": 1, "args": {"External id": 4279, "Ev Idx": 4278}}, {"ph": "X", "cat": "cpu_op", "name": "aten::normal_", "pid": 494, "tid": 494, "ts": 1742522672426903, "dur": 19, "args": {"External id": 4280, "Ev Idx": 4279}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672426934, "dur": 9, "args": {"External id": 4281, "Ev Idx": 4280}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426938, "dur": 1, "args": {"External id": 4282, "Ev Idx": 4281}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672426945, "dur": 2, "args": {"External id": 4283, "Ev Idx": 4282}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426946, "dur": 0, "args": {"External id": 4284, "Ev Idx": 4283}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426958, "dur": 4, "args": {"External id": 4285, "Ev Idx": 4284}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672426963, "dur": 2, "args": {"External id": 4286, "Ev Idx": 4285}}, {"ph": "X", "cat": "cpu_op", "name": "aten::chunk", "pid": 494, "tid": 494, "ts": 1742522672426988, "dur": 11, "args": {"External id": 4287, "Ev Idx": 4286}}, {"ph": "X", "cat": "cpu_op", "name": "aten::split", "pid": 494, "tid": 494, "ts": 1742522672426992, "dur": 7, "args": {"External id": 4288, "Ev Idx": 4287}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672426994, "dur": 3, "args": {"External id": 4289, "Ev Idx": 4288}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426996, "dur": 1, "args": {"External id": 4290, "Ev Idx": 4289}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426996, "dur": 1, "args": {"External id": 4291, "Ev Idx": 4290}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672426998, "dur": 1, "args": {"External id": 4292, "Ev Idx": 4291}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672426998, "dur": 1, "args": {"External id": 4293, "Ev Idx": 4292}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672426998, "dur": 0, "args": {"External id": 4294, "Ev Idx": 4293}}, {"ph": "X", "cat": "cpu_op", "name": "aten::chunk", "pid": 494, "tid": 494, "ts": 1742522672427000, "dur": 3, "args": {"External id": 4295, "Ev Idx": 4294}}, {"ph": "X", "cat": "cpu_op", "name": "aten::split", "pid": 494, "tid": 494, "ts": 1742522672427001, "dur": 2, "args": {"External id": 4296, "Ev Idx": 4295}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672427001, "dur": 1, "args": {"External id": 4297, "Ev Idx": 4296}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672427001, "dur": 1, "args": {"External id": 4298, "Ev Idx": 4297}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427001, "dur": 0, "args": {"External id": 4299, "Ev Idx": 4298}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672427002, "dur": 1, "args": {"External id": 4300, "Ev Idx": 4299}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672427002, "dur": 1, "args": {"External id": 4301, "Ev Idx": 4300}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427003, "dur": 0, "args": {"External id": 4302, "Ev Idx": 4301}}, {"ph": "X", "cat": "cpu_op", "name": "aten::chunk", "pid": 494, "tid": 494, "ts": 1742522672427004, "dur": 3, "args": {"External id": 4303, "Ev Idx": 4302}}, {"ph": "X", "cat": "cpu_op", "name": "aten::split", "pid": 494, "tid": 494, "ts": 1742522672427004, "dur": 3, "args": {"External id": 4304, "Ev Idx": 4303}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672427005, "dur": 1, "args": {"External id": 4305, "Ev Idx": 4304}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672427005, "dur": 0, "args": {"External id": 4306, "Ev Idx": 4305}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427005, "dur": 0, "args": {"External id": 4307, "Ev Idx": 4306}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672427006, "dur": 1, "args": {"External id": 4308, "Ev Idx": 4307}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672427006, "dur": 1, "args": {"External id": 4309, "Ev Idx": 4308}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427006, "dur": 1, "args": {"External id": 4310, "Ev Idx": 4309}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672427012, "dur": 2, "args": {"External id": 4311, "Ev Idx": 4310}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427013, "dur": 0, "args": {"External id": 4312, "Ev Idx": 4311}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672427015, "dur": 2, "args": {"External id": 4313, "Ev Idx": 4312}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427015, "dur": 1, "args": {"External id": 4314, "Ev Idx": 4313}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427037, "dur": 3, "args": {"External id": 4315, "Ev Idx": 4314}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427041, "dur": 3, "args": {"External id": 4316, "Ev Idx": 4315}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427044, "dur": 3, "args": {"External id": 4317, "Ev Idx": 4316}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427047, "dur": 3, "args": {"External id": 4318, "Ev Idx": 4317}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427050, "dur": 3, "args": {"External id": 4319, "Ev Idx": 4318}}, {"ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 494, "tid": 494, "ts": 1742522672427054, "dur": 1, "args": {"External id": 4320, "Ev Idx": 4319}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427054, "dur": 1, "args": {"External id": 4321, "Ev Idx": 4320}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672427118, "dur": 2, "args": {"External id": 4322, "Ev Idx": 4321}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427119, "dur": 0, "args": {"External id": 4323, "Ev Idx": 4322}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672427122, "dur": 1, "args": {"External id": 4324, "Ev Idx": 4323}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427122, "dur": 1, "args": {"External id": 4325, "Ev Idx": 4324}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427127, "dur": 4, "args": {"External id": 4326, "Ev Idx": 4325}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427131, "dur": 3, "args": {"External id": 4327, "Ev Idx": 4326}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427134, "dur": 3, "args": {"External id": 4328, "Ev Idx": 4327}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427138, "dur": 2, "args": {"External id": 4329, "Ev Idx": 4328}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427140, "dur": 3, "args": {"External id": 4330, "Ev Idx": 4329}}, {"ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 494, "tid": 494, "ts": 1742522672427144, "dur": 1, "args": {"External id": 4331, "Ev Idx": 4330}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427144, "dur": 1, "args": {"External id": 4332, "Ev Idx": 4331}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427176, "dur": 4, "args": {"External id": 4333, "Ev Idx": 4332}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427292, "dur": 4, "args": {"External id": 4334, "Ev Idx": 4333}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427297, "dur": 3, "args": {"External id": 4335, "Ev Idx": 4334}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427301, "dur": 1, "args": {"External id": 4336, "Ev Idx": 4335}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427318, "dur": 3, "args": {"External id": 4337, "Ev Idx": 4336}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427418, "dur": 3, "args": {"External id": 4338, "Ev Idx": 4337}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672427450, "dur": 5, "args": {"External id": 4339, "Ev Idx": 4338}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672427451, "dur": 4, "args": {"External id": 4340, "Ev Idx": 4339}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427456, "dur": 3, "args": {"External id": 4341, "Ev Idx": 4340}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427460, "dur": 1, "args": {"External id": 4342, "Ev Idx": 4341}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427474, "dur": 3, "args": {"External id": 4343, "Ev Idx": 4342}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427579, "dur": 4, "args": {"External id": 4344, "Ev Idx": 4343}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427583, "dur": 3, "args": {"External id": 4345, "Ev Idx": 4344}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427587, "dur": 0, "args": {"External id": 4346, "Ev Idx": 4345}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427609, "dur": 3, "args": {"External id": 4347, "Ev Idx": 4346}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427713, "dur": 3, "args": {"External id": 4348, "Ev Idx": 4347}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427790, "dur": 3, "args": {"External id": 4349, "Ev Idx": 4348}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427794, "dur": 3, "args": {"External id": 4350, "Ev Idx": 4349}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672427797, "dur": 1, "args": {"External id": 4351, "Ev Idx": 4350}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427811, "dur": 3, "args": {"External id": 4352, "Ev Idx": 4351}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672427899, "dur": 4, "args": {"External id": 4353, "Ev Idx": 4352}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672427928, "dur": 16, "args": {"External id": 4354, "Ev Idx": 4353}}, {"ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 494, "tid": 494, "ts": 1742522672427958, "dur": 14, "args": {"External id": 4355, "Ev Idx": 4354}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428006, "dur": 3, "args": {"External id": 4356, "Ev Idx": 4355}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428012, "dur": 1, "args": {"External id": 4357, "Ev Idx": 4356}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428014, "dur": 6, "args": {"External id": 4358, "Ev Idx": 4357}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428015, "dur": 4, "args": {"External id": 4359, "Ev Idx": 4358}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428021, "dur": 3, "args": {"External id": 4360, "Ev Idx": 4359}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428021, "dur": 3, "args": {"External id": 4361, "Ev Idx": 4360}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428086, "dur": 0, "args": {"External id": 4362, "Ev Idx": 4361}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428088, "dur": 0, "args": {"External id": 4363, "Ev Idx": 4362}}, {"ph": "X", "cat": "cpu_op", "name": "aten::index", "pid": 494, "tid": 494, "ts": 1742522672428099, "dur": 25, "args": {"External id": 4364, "Ev Idx": 4363}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428103, "dur": 1, "args": {"External id": 4365, "Ev Idx": 4364}}, {"ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 494, "tid": 494, "ts": 1742522672428105, "dur": 1, "args": {"External id": 4366, "Ev Idx": 4365}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428106, "dur": 0, "args": {"External id": 4367, "Ev Idx": 4366}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672428131, "dur": 4, "args": {"External id": 4368, "Ev Idx": 4367}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672428190, "dur": 2, "args": {"External id": 4369, "Ev Idx": 4368}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428191, "dur": 0, "args": {"External id": 4370, "Ev Idx": 4369}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unbind", "pid": 494, "tid": 494, "ts": 1742522672428199, "dur": 100, "args": {"External id": 4371, "Ev Idx": 4370}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428200, "dur": 2, "args": {"External id": 4372, "Ev Idx": 4371}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428201, "dur": 1, "args": {"External id": 4373, "Ev Idx": 4372}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428202, "dur": 1, "args": {"External id": 4374, "Ev Idx": 4373}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428203, "dur": 0, "args": {"External id": 4375, "Ev Idx": 4374}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428203, "dur": 1, "args": {"External id": 4376, "Ev Idx": 4375}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428203, "dur": 1, "args": {"External id": 4377, "Ev Idx": 4376}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428204, "dur": 0, "args": {"External id": 4378, "Ev Idx": 4377}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428204, "dur": 0, "args": {"External id": 4379, "Ev Idx": 4378}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428205, "dur": 0, "args": {"External id": 4380, "Ev Idx": 4379}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428205, "dur": 0, "args": {"External id": 4381, "Ev Idx": 4380}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428205, "dur": 1, "args": {"External id": 4382, "Ev Idx": 4381}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428206, "dur": 0, "args": {"External id": 4383, "Ev Idx": 4382}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428206, "dur": 1, "args": {"External id": 4384, "Ev Idx": 4383}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428206, "dur": 1, "args": {"External id": 4385, "Ev Idx": 4384}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428207, "dur": 1, "args": {"External id": 4386, "Ev Idx": 4385}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428207, "dur": 0, "args": {"External id": 4387, "Ev Idx": 4386}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428208, "dur": 0, "args": {"External id": 4388, "Ev Idx": 4387}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428208, "dur": 0, "args": {"External id": 4389, "Ev Idx": 4388}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428209, "dur": 0, "args": {"External id": 4390, "Ev Idx": 4389}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428209, "dur": 0, "args": {"External id": 4391, "Ev Idx": 4390}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428210, "dur": 0, "args": {"External id": 4392, "Ev Idx": 4391}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428210, "dur": 0, "args": {"External id": 4393, "Ev Idx": 4392}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428210, "dur": 1, "args": {"External id": 4394, "Ev Idx": 4393}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428211, "dur": 0, "args": {"External id": 4395, "Ev Idx": 4394}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428211, "dur": 1, "args": {"External id": 4396, "Ev Idx": 4395}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428211, "dur": 0, "args": {"External id": 4397, "Ev Idx": 4396}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428212, "dur": 0, "args": {"External id": 4398, "Ev Idx": 4397}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428212, "dur": 0, "args": {"External id": 4399, "Ev Idx": 4398}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428213, "dur": 0, "args": {"External id": 4400, "Ev Idx": 4399}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428213, "dur": 0, "args": {"External id": 4401, "Ev Idx": 4400}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428214, "dur": 0, "args": {"External id": 4402, "Ev Idx": 4401}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428214, "dur": 0, "args": {"External id": 4403, "Ev Idx": 4402}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428214, "dur": 1, "args": {"External id": 4404, "Ev Idx": 4403}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428215, "dur": 0, "args": {"External id": 4405, "Ev Idx": 4404}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428215, "dur": 1, "args": {"External id": 4406, "Ev Idx": 4405}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428215, "dur": 0, "args": {"External id": 4407, "Ev Idx": 4406}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428216, "dur": 0, "args": {"External id": 4408, "Ev Idx": 4407}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428216, "dur": 0, "args": {"External id": 4409, "Ev Idx": 4408}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428217, "dur": 0, "args": {"External id": 4410, "Ev Idx": 4409}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428217, "dur": 0, "args": {"External id": 4411, "Ev Idx": 4410}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428217, "dur": 1, "args": {"External id": 4412, "Ev Idx": 4411}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428218, "dur": 0, "args": {"External id": 4413, "Ev Idx": 4412}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428218, "dur": 1, "args": {"External id": 4414, "Ev Idx": 4413}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428218, "dur": 1, "args": {"External id": 4415, "Ev Idx": 4414}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428219, "dur": 1, "args": {"External id": 4416, "Ev Idx": 4415}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428219, "dur": 0, "args": {"External id": 4417, "Ev Idx": 4416}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428220, "dur": 0, "args": {"External id": 4418, "Ev Idx": 4417}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428220, "dur": 0, "args": {"External id": 4419, "Ev Idx": 4418}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428221, "dur": 0, "args": {"External id": 4420, "Ev Idx": 4419}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428221, "dur": 0, "args": {"External id": 4421, "Ev Idx": 4420}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428221, "dur": 1, "args": {"External id": 4422, "Ev Idx": 4421}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428222, "dur": 0, "args": {"External id": 4423, "Ev Idx": 4422}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428222, "dur": 1, "args": {"External id": 4424, "Ev Idx": 4423}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428222, "dur": 0, "args": {"External id": 4425, "Ev Idx": 4424}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428223, "dur": 0, "args": {"External id": 4426, "Ev Idx": 4425}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428223, "dur": 0, "args": {"External id": 4427, "Ev Idx": 4426}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428224, "dur": 0, "args": {"External id": 4428, "Ev Idx": 4427}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428224, "dur": 0, "args": {"External id": 4429, "Ev Idx": 4428}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428224, "dur": 1, "args": {"External id": 4430, "Ev Idx": 4429}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428225, "dur": 0, "args": {"External id": 4431, "Ev Idx": 4430}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428225, "dur": 1, "args": {"External id": 4432, "Ev Idx": 4431}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428225, "dur": 0, "args": {"External id": 4433, "Ev Idx": 4432}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428226, "dur": 0, "args": {"External id": 4434, "Ev Idx": 4433}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428226, "dur": 0, "args": {"External id": 4435, "Ev Idx": 4434}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428227, "dur": 0, "args": {"External id": 4436, "Ev Idx": 4435}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428227, "dur": 0, "args": {"External id": 4437, "Ev Idx": 4436}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428227, "dur": 1, "args": {"External id": 4438, "Ev Idx": 4437}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428228, "dur": 0, "args": {"External id": 4439, "Ev Idx": 4438}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428228, "dur": 1, "args": {"External id": 4440, "Ev Idx": 4439}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428228, "dur": 1, "args": {"External id": 4441, "Ev Idx": 4440}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428229, "dur": 0, "args": {"External id": 4442, "Ev Idx": 4441}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428229, "dur": 0, "args": {"External id": 4443, "Ev Idx": 4442}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428230, "dur": 0, "args": {"External id": 4444, "Ev Idx": 4443}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428230, "dur": 0, "args": {"External id": 4445, "Ev Idx": 4444}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428230, "dur": 1, "args": {"External id": 4446, "Ev Idx": 4445}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428231, "dur": 0, "args": {"External id": 4447, "Ev Idx": 4446}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428231, "dur": 0, "args": {"External id": 4448, "Ev Idx": 4447}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428231, "dur": 0, "args": {"External id": 4449, "Ev Idx": 4448}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428232, "dur": 0, "args": {"External id": 4450, "Ev Idx": 4449}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428232, "dur": 0, "args": {"External id": 4451, "Ev Idx": 4450}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428232, "dur": 1, "args": {"External id": 4452, "Ev Idx": 4451}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428233, "dur": 0, "args": {"External id": 4453, "Ev Idx": 4452}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428233, "dur": 1, "args": {"External id": 4454, "Ev Idx": 4453}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428233, "dur": 0, "args": {"External id": 4455, "Ev Idx": 4454}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428234, "dur": 0, "args": {"External id": 4456, "Ev Idx": 4455}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428234, "dur": 0, "args": {"External id": 4457, "Ev Idx": 4456}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428234, "dur": 1, "args": {"External id": 4458, "Ev Idx": 4457}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428235, "dur": 0, "args": {"External id": 4459, "Ev Idx": 4458}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428235, "dur": 1, "args": {"External id": 4460, "Ev Idx": 4459}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428235, "dur": 1, "args": {"External id": 4461, "Ev Idx": 4460}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428236, "dur": 0, "args": {"External id": 4462, "Ev Idx": 4461}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428236, "dur": 0, "args": {"External id": 4463, "Ev Idx": 4462}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428237, "dur": 0, "args": {"External id": 4464, "Ev Idx": 4463}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428237, "dur": 0, "args": {"External id": 4465, "Ev Idx": 4464}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428237, "dur": 1, "args": {"External id": 4466, "Ev Idx": 4465}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428238, "dur": 0, "args": {"External id": 4467, "Ev Idx": 4466}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428238, "dur": 1, "args": {"External id": 4468, "Ev Idx": 4467}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428239, "dur": 0, "args": {"External id": 4469, "Ev Idx": 4468}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428239, "dur": 0, "args": {"External id": 4470, "Ev Idx": 4469}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428239, "dur": 0, "args": {"External id": 4471, "Ev Idx": 4470}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428240, "dur": 0, "args": {"External id": 4472, "Ev Idx": 4471}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428240, "dur": 0, "args": {"External id": 4473, "Ev Idx": 4472}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428240, "dur": 1, "args": {"External id": 4474, "Ev Idx": 4473}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428241, "dur": 0, "args": {"External id": 4475, "Ev Idx": 4474}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428241, "dur": 1, "args": {"External id": 4476, "Ev Idx": 4475}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428241, "dur": 0, "args": {"External id": 4477, "Ev Idx": 4476}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428242, "dur": 0, "args": {"External id": 4478, "Ev Idx": 4477}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428242, "dur": 0, "args": {"External id": 4479, "Ev Idx": 4478}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428243, "dur": 0, "args": {"External id": 4480, "Ev Idx": 4479}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428243, "dur": 0, "args": {"External id": 4481, "Ev Idx": 4480}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428243, "dur": 1, "args": {"External id": 4482, "Ev Idx": 4481}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428243, "dur": 1, "args": {"External id": 4483, "Ev Idx": 4482}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428244, "dur": 0, "args": {"External id": 4484, "Ev Idx": 4483}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428244, "dur": 0, "args": {"External id": 4485, "Ev Idx": 4484}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428245, "dur": 0, "args": {"External id": 4486, "Ev Idx": 4485}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428245, "dur": 0, "args": {"External id": 4487, "Ev Idx": 4486}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428245, "dur": 1, "args": {"External id": 4488, "Ev Idx": 4487}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428246, "dur": 0, "args": {"External id": 4489, "Ev Idx": 4488}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428246, "dur": 0, "args": {"External id": 4490, "Ev Idx": 4489}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428246, "dur": 0, "args": {"External id": 4491, "Ev Idx": 4490}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428247, "dur": 0, "args": {"External id": 4492, "Ev Idx": 4491}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428247, "dur": 0, "args": {"External id": 4493, "Ev Idx": 4492}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428247, "dur": 1, "args": {"External id": 4494, "Ev Idx": 4493}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428248, "dur": 0, "args": {"External id": 4495, "Ev Idx": 4494}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428248, "dur": 1, "args": {"External id": 4496, "Ev Idx": 4495}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428249, "dur": 0, "args": {"External id": 4497, "Ev Idx": 4496}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428249, "dur": 1, "args": {"External id": 4498, "Ev Idx": 4497}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428249, "dur": 1, "args": {"External id": 4499, "Ev Idx": 4498}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428250, "dur": 1, "args": {"External id": 4500, "Ev Idx": 4499}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428250, "dur": 0, "args": {"External id": 4501, "Ev Idx": 4500}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428251, "dur": 0, "args": {"External id": 4502, "Ev Idx": 4501}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428251, "dur": 0, "args": {"External id": 4503, "Ev Idx": 4502}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428251, "dur": 1, "args": {"External id": 4504, "Ev Idx": 4503}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428252, "dur": 0, "args": {"External id": 4505, "Ev Idx": 4504}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428252, "dur": 1, "args": {"External id": 4506, "Ev Idx": 4505}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428252, "dur": 0, "args": {"External id": 4507, "Ev Idx": 4506}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428253, "dur": 0, "args": {"External id": 4508, "Ev Idx": 4507}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428253, "dur": 0, "args": {"External id": 4509, "Ev Idx": 4508}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428254, "dur": 0, "args": {"External id": 4510, "Ev Idx": 4509}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428254, "dur": 0, "args": {"External id": 4511, "Ev Idx": 4510}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428254, "dur": 1, "args": {"External id": 4512, "Ev Idx": 4511}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428254, "dur": 1, "args": {"External id": 4513, "Ev Idx": 4512}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428255, "dur": 0, "args": {"External id": 4514, "Ev Idx": 4513}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428255, "dur": 0, "args": {"External id": 4515, "Ev Idx": 4514}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428256, "dur": 0, "args": {"External id": 4516, "Ev Idx": 4515}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428256, "dur": 0, "args": {"External id": 4517, "Ev Idx": 4516}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428256, "dur": 1, "args": {"External id": 4518, "Ev Idx": 4517}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428257, "dur": 0, "args": {"External id": 4519, "Ev Idx": 4518}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428257, "dur": 0, "args": {"External id": 4520, "Ev Idx": 4519}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428257, "dur": 0, "args": {"External id": 4521, "Ev Idx": 4520}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428258, "dur": 0, "args": {"External id": 4522, "Ev Idx": 4521}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428258, "dur": 0, "args": {"External id": 4523, "Ev Idx": 4522}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428259, "dur": 0, "args": {"External id": 4524, "Ev Idx": 4523}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428259, "dur": 0, "args": {"External id": 4525, "Ev Idx": 4524}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428259, "dur": 1, "args": {"External id": 4526, "Ev Idx": 4525}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428260, "dur": 0, "args": {"External id": 4527, "Ev Idx": 4526}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428260, "dur": 0, "args": {"External id": 4528, "Ev Idx": 4527}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428260, "dur": 0, "args": {"External id": 4529, "Ev Idx": 4528}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428261, "dur": 0, "args": {"External id": 4530, "Ev Idx": 4529}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428261, "dur": 0, "args": {"External id": 4531, "Ev Idx": 4530}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428261, "dur": 1, "args": {"External id": 4532, "Ev Idx": 4531}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428262, "dur": 0, "args": {"External id": 4533, "Ev Idx": 4532}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428262, "dur": 1, "args": {"External id": 4534, "Ev Idx": 4533}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428262, "dur": 0, "args": {"External id": 4535, "Ev Idx": 4534}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428263, "dur": 0, "args": {"External id": 4536, "Ev Idx": 4535}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428263, "dur": 0, "args": {"External id": 4537, "Ev Idx": 4536}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428263, "dur": 1, "args": {"External id": 4538, "Ev Idx": 4537}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428264, "dur": 0, "args": {"External id": 4539, "Ev Idx": 4538}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428264, "dur": 1, "args": {"External id": 4540, "Ev Idx": 4539}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428264, "dur": 0, "args": {"External id": 4541, "Ev Idx": 4540}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428265, "dur": 0, "args": {"External id": 4542, "Ev Idx": 4541}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428265, "dur": 0, "args": {"External id": 4543, "Ev Idx": 4542}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428266, "dur": 0, "args": {"External id": 4544, "Ev Idx": 4543}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428266, "dur": 0, "args": {"External id": 4545, "Ev Idx": 4544}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428266, "dur": 1, "args": {"External id": 4546, "Ev Idx": 4545}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428266, "dur": 1, "args": {"External id": 4547, "Ev Idx": 4546}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428267, "dur": 0, "args": {"External id": 4548, "Ev Idx": 4547}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428267, "dur": 0, "args": {"External id": 4549, "Ev Idx": 4548}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428268, "dur": 1, "args": {"External id": 4550, "Ev Idx": 4549}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428268, "dur": 1, "args": {"External id": 4551, "Ev Idx": 4550}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428269, "dur": 1, "args": {"External id": 4552, "Ev Idx": 4551}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428270, "dur": 0, "args": {"External id": 4553, "Ev Idx": 4552}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428270, "dur": 1, "args": {"External id": 4554, "Ev Idx": 4553}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428271, "dur": 0, "args": {"External id": 4555, "Ev Idx": 4554}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428271, "dur": 1, "args": {"External id": 4556, "Ev Idx": 4555}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428271, "dur": 0, "args": {"External id": 4557, "Ev Idx": 4556}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428272, "dur": 1, "args": {"External id": 4558, "Ev Idx": 4557}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428272, "dur": 1, "args": {"External id": 4559, "Ev Idx": 4558}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428273, "dur": 0, "args": {"External id": 4560, "Ev Idx": 4559}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428273, "dur": 0, "args": {"External id": 4561, "Ev Idx": 4560}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428274, "dur": 0, "args": {"External id": 4562, "Ev Idx": 4561}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428274, "dur": 0, "args": {"External id": 4563, "Ev Idx": 4562}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428274, "dur": 1, "args": {"External id": 4564, "Ev Idx": 4563}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428275, "dur": 0, "args": {"External id": 4565, "Ev Idx": 4564}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428275, "dur": 0, "args": {"External id": 4566, "Ev Idx": 4565}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428275, "dur": 0, "args": {"External id": 4567, "Ev Idx": 4566}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428276, "dur": 0, "args": {"External id": 4568, "Ev Idx": 4567}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428276, "dur": 0, "args": {"External id": 4569, "Ev Idx": 4568}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428276, "dur": 1, "args": {"External id": 4570, "Ev Idx": 4569}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428277, "dur": 0, "args": {"External id": 4571, "Ev Idx": 4570}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428277, "dur": 0, "args": {"External id": 4572, "Ev Idx": 4571}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428277, "dur": 0, "args": {"External id": 4573, "Ev Idx": 4572}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428278, "dur": 0, "args": {"External id": 4574, "Ev Idx": 4573}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428278, "dur": 0, "args": {"External id": 4575, "Ev Idx": 4574}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428278, "dur": 1, "args": {"External id": 4576, "Ev Idx": 4575}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428279, "dur": 0, "args": {"External id": 4577, "Ev Idx": 4576}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428279, "dur": 1, "args": {"External id": 4578, "Ev Idx": 4577}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428279, "dur": 1, "args": {"External id": 4579, "Ev Idx": 4578}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428280, "dur": 1, "args": {"External id": 4580, "Ev Idx": 4579}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428280, "dur": 0, "args": {"External id": 4581, "Ev Idx": 4580}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428281, "dur": 0, "args": {"External id": 4582, "Ev Idx": 4581}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428281, "dur": 0, "args": {"External id": 4583, "Ev Idx": 4582}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428282, "dur": 0, "args": {"External id": 4584, "Ev Idx": 4583}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428282, "dur": 0, "args": {"External id": 4585, "Ev Idx": 4584}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428282, "dur": 1, "args": {"External id": 4586, "Ev Idx": 4585}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428282, "dur": 1, "args": {"External id": 4587, "Ev Idx": 4586}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428283, "dur": 0, "args": {"External id": 4588, "Ev Idx": 4587}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428283, "dur": 0, "args": {"External id": 4589, "Ev Idx": 4588}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428284, "dur": 0, "args": {"External id": 4590, "Ev Idx": 4589}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428284, "dur": 0, "args": {"External id": 4591, "Ev Idx": 4590}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428284, "dur": 1, "args": {"External id": 4592, "Ev Idx": 4591}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428285, "dur": 0, "args": {"External id": 4593, "Ev Idx": 4592}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428285, "dur": 0, "args": {"External id": 4594, "Ev Idx": 4593}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428285, "dur": 0, "args": {"External id": 4595, "Ev Idx": 4594}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428286, "dur": 0, "args": {"External id": 4596, "Ev Idx": 4595}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428286, "dur": 0, "args": {"External id": 4597, "Ev Idx": 4596}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428286, "dur": 1, "args": {"External id": 4598, "Ev Idx": 4597}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428287, "dur": 0, "args": {"External id": 4599, "Ev Idx": 4598}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428287, "dur": 0, "args": {"External id": 4600, "Ev Idx": 4599}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428287, "dur": 0, "args": {"External id": 4601, "Ev Idx": 4600}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428288, "dur": 0, "args": {"External id": 4602, "Ev Idx": 4601}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428288, "dur": 0, "args": {"External id": 4603, "Ev Idx": 4602}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428288, "dur": 1, "args": {"External id": 4604, "Ev Idx": 4603}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428289, "dur": 0, "args": {"External id": 4605, "Ev Idx": 4604}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428289, "dur": 1, "args": {"External id": 4606, "Ev Idx": 4605}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428289, "dur": 1, "args": {"External id": 4607, "Ev Idx": 4606}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428290, "dur": 3, "args": {"External id": 4608, "Ev Idx": 4607}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428293, "dur": 0, "args": {"External id": 4609, "Ev Idx": 4608}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428293, "dur": 1, "args": {"External id": 4610, "Ev Idx": 4609}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428293, "dur": 0, "args": {"External id": 4611, "Ev Idx": 4610}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428294, "dur": 0, "args": {"External id": 4612, "Ev Idx": 4611}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428294, "dur": 0, "args": {"External id": 4613, "Ev Idx": 4612}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428295, "dur": 0, "args": {"External id": 4614, "Ev Idx": 4613}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428295, "dur": 0, "args": {"External id": 4615, "Ev Idx": 4614}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428295, "dur": 1, "args": {"External id": 4616, "Ev Idx": 4615}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428295, "dur": 1, "args": {"External id": 4617, "Ev Idx": 4616}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428296, "dur": 0, "args": {"External id": 4618, "Ev Idx": 4617}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428296, "dur": 0, "args": {"External id": 4619, "Ev Idx": 4618}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428297, "dur": 0, "args": {"External id": 4620, "Ev Idx": 4619}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428297, "dur": 0, "args": {"External id": 4621, "Ev Idx": 4620}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428297, "dur": 1, "args": {"External id": 4622, "Ev Idx": 4621}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428298, "dur": 0, "args": {"External id": 4623, "Ev Idx": 4622}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428298, "dur": 0, "args": {"External id": 4624, "Ev Idx": 4623}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428298, "dur": 0, "args": {"External id": 4625, "Ev Idx": 4624}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428299, "dur": 0, "args": {"External id": 4626, "Ev Idx": 4625}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428299, "dur": 0, "args": {"External id": 4627, "Ev Idx": 4626}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672428352, "dur": 5, "args": {"External id": 4628, "Ev Idx": 4627}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672428358, "dur": 2, "args": {"External id": 4629, "Ev Idx": 4628}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sort", "pid": 494, "tid": 494, "ts": 1742522672428373, "dur": 65, "args": {"External id": 4630, "Ev Idx": 4629}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sort", "pid": 494, "tid": 494, "ts": 1742522672428374, "dur": 64, "args": {"External id": 4631, "Ev Idx": 4630}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428380, "dur": 14, "args": {"External id": 4632, "Ev Idx": 4631}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672428395, "dur": 17, "args": {"External id": 4633, "Ev Idx": 4632}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672428397, "dur": 2, "args": {"External id": 4634, "Ev Idx": 4633}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672428400, "dur": 12, "args": {"External id": 4635, "Ev Idx": 4634}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672428402, "dur": 3, "args": {"External id": 4636, "Ev Idx": 4635}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428412, "dur": 1, "args": {"External id": 4637, "Ev Idx": 4636}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428414, "dur": 8, "args": {"External id": 4638, "Ev Idx": 4637}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428424, "dur": 6, "args": {"External id": 4639, "Ev Idx": 4638}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672428452, "dur": 11, "args": {"External id": 4640, "Ev Idx": 4639}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672428452, "dur": 3, "args": {"External id": 4641, "Ev Idx": 4640}}, {"ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 494, "tid": 494, "ts": 1742522672428455, "dur": 8, "args": {"External id": 4642, "Ev Idx": 4641}}, {"ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 494, "tid": 494, "ts": 1742522672428456, "dur": 2, "args": {"External id": 4643, "Ev Idx": 4642}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 494, "tid": 494, "ts": 1742522672428466, "dur": 3, "args": {"External id": 4644, "Ev Idx": 4643}}, {"ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 494, "tid": 494, "ts": 1742522672428466, "dur": 3, "args": {"External id": 4645, "Ev Idx": 4644}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428468, "dur": 0, "args": {"External id": 4646, "Ev Idx": 4645}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428471, "dur": 4, "args": {"External id": 4647, "Ev Idx": 4646}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428472, "dur": 3, "args": {"External id": 4648, "Ev Idx": 4647}}, {"ph": "X", "cat": "cpu_op", "name": "aten::scatter_", "pid": 494, "tid": 494, "ts": 1742522672428477, "dur": 13, "args": {"External id": 4649, "Ev Idx": 4648}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428481, "dur": 0, "args": {"External id": 4650, "Ev Idx": 4649}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428482, "dur": 0, "args": {"External id": 4651, "Ev Idx": 4650}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672428493, "dur": 12, "args": {"External id": 4652, "Ev Idx": 4651}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428497, "dur": 0, "args": {"External id": 4653, "Ev Idx": 4652}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428497, "dur": 1, "args": {"External id": 4654, "Ev Idx": 4653}}, {"ph": "X", "cat": "cpu_op", "name": "aten::softmax", "pid": 494, "tid": 494, "ts": 1742522672428510, "dur": 10, "args": {"External id": 4655, "Ev Idx": 4654}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428510, "dur": 1, "args": {"External id": 4656, "Ev Idx": 4655}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_softmax", "pid": 494, "tid": 494, "ts": 1742522672428512, "dur": 8, "args": {"External id": 4657, "Ev Idx": 4656}}, {"ph": "X", "cat": "cpu_op", "name": "aten::log", "pid": 494, "tid": 494, "ts": 1742522672428522, "dur": 10, "args": {"External id": 4658, "Ev Idx": 4657}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672428536, "dur": 1, "args": {"External id": 4659, "Ev Idx": 4658}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428537, "dur": 0, "args": {"External id": 4660, "Ev Idx": 4659}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428538, "dur": 2, "args": {"External id": 4661, "Ev Idx": 4660}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428540, "dur": 0, "args": {"External id": 4662, "Ev Idx": 4661}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428541, "dur": 9, "args": {"External id": 4663, "Ev Idx": 4662}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428554, "dur": 3, "args": {"External id": 4664, "Ev Idx": 4663}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428554, "dur": 3, "args": {"External id": 4665, "Ev Idx": 4664}}, {"ph": "X", "cat": "cpu_op", "name": "aten::exponential_", "pid": 494, "tid": 494, "ts": 1742522672428559, "dur": 12, "args": {"External id": 4666, "Ev Idx": 4665}}, {"ph": "X", "cat": "cpu_op", "name": "aten::div_", "pid": 494, "tid": 494, "ts": 1742522672428572, "dur": 9, "args": {"External id": 4667, "Ev Idx": 4666}}, {"ph": "X", "cat": "cpu_op", "name": "aten::argmax", "pid": 494, "tid": 494, "ts": 1742522672428583, "dur": 13, "args": {"External id": 4668, "Ev Idx": 4667}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428587, "dur": 0, "args": {"External id": 4669, "Ev Idx": 4668}}, {"ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 494, "tid": 494, "ts": 1742522672428601, "dur": 1, "args": {"External id": 4670, "Ev Idx": 4669}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428601, "dur": 1, "args": {"External id": 4671, "Ev Idx": 4670}}, {"ph": "X", "cat": "cpu_op", "name": "aten::gather", "pid": 494, "tid": 494, "ts": 1742522672428604, "dur": 11, "args": {"External id": 4672, "Ev Idx": 4671}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428607, "dur": 0, "args": {"External id": 4673, "Ev Idx": 4672}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428607, "dur": 1, "args": {"External id": 4674, "Ev Idx": 4673}}, {"ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 494, "tid": 494, "ts": 1742522672428617, "dur": 1, "args": {"External id": 4675, "Ev Idx": 4674}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428618, "dur": 0, "args": {"External id": 4676, "Ev Idx": 4675}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672428621, "dur": 1, "args": {"External id": 4677, "Ev Idx": 4676}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428622, "dur": 0, "args": {"External id": 4678, "Ev Idx": 4677}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428623, "dur": 1, "args": {"External id": 4679, "Ev Idx": 4678}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428623, "dur": 1, "args": {"External id": 4680, "Ev Idx": 4679}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428624, "dur": 9, "args": {"External id": 4681, "Ev Idx": 4680}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672428636, "dur": 0, "args": {"External id": 4682, "Ev Idx": 4681}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428636, "dur": 0, "args": {"External id": 4683, "Ev Idx": 4682}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428637, "dur": 1, "args": {"External id": 4684, "Ev Idx": 4683}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428638, "dur": 0, "args": {"External id": 4685, "Ev Idx": 4684}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428639, "dur": 6, "args": {"External id": 4686, "Ev Idx": 4685}}, {"ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 494, "tid": 494, "ts": 1742522672428659, "dur": 18, "args": {"External id": 4687, "Ev Idx": 4686}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672428660, "dur": 15, "args": {"External id": 4688, "Ev Idx": 4687}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672428666, "dur": 2, "args": {"External id": 4689, "Ev Idx": 4688}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672428667, "dur": 1, "args": {"External id": 4690, "Ev Idx": 4689}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428668, "dur": 0, "args": {"External id": 4691, "Ev Idx": 4690}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428676, "dur": 1, "args": {"External id": 4692, "Ev Idx": 4691}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672428679, "dur": 1, "args": {"External id": 4693, "Ev Idx": 4692}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428679, "dur": 0, "args": {"External id": 4694, "Ev Idx": 4693}}, {"ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 494, "tid": 494, "ts": 1742522672428681, "dur": 13, "args": {"External id": 4695, "Ev Idx": 4694}}, {"ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 494, "tid": 494, "ts": 1742522672428682, "dur": 11, "args": {"External id": 4696, "Ev Idx": 4695}}, {"ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 494, "tid": 494, "ts": 1742522672428685, "dur": 2, "args": {"External id": 4697, "Ev Idx": 4696}}, {"ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 494, "tid": 494, "ts": 1742522672428686, "dur": 0, "args": {"External id": 4698, "Ev Idx": 4697}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428686, "dur": 0, "args": {"External id": 4699, "Ev Idx": 4698}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428694, "dur": 0, "args": {"External id": 4700, "Ev Idx": 4699}}, {"ph": "X", "cat": "cpu_op", "name": "aten::flatten", "pid": 494, "tid": 494, "ts": 1742522672428695, "dur": 1, "args": {"External id": 4701, "Ev Idx": 4700}}, {"ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 494, "tid": 494, "ts": 1742522672428695, "dur": 1, "args": {"External id": 4702, "Ev Idx": 4701}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428699, "dur": 17, "args": {"External id": 4703, "Ev Idx": 4702}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428700, "dur": 16, "args": {"External id": 4704, "Ev Idx": 4703}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428701, "dur": 5, "args": {"External id": 4705, "Ev Idx": 4704}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428706, "dur": 10, "args": {"External id": 4706, "Ev Idx": 4705}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428719, "dur": 9, "args": {"External id": 4707, "Ev Idx": 4706}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428719, "dur": 9, "args": {"External id": 4708, "Ev Idx": 4707}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428720, "dur": 2, "args": {"External id": 4709, "Ev Idx": 4708}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428722, "dur": 6, "args": {"External id": 4710, "Ev Idx": 4709}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428730, "dur": 9, "args": {"External id": 4711, "Ev Idx": 4710}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428731, "dur": 8, "args": {"External id": 4712, "Ev Idx": 4711}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428731, "dur": 2, "args": {"External id": 4713, "Ev Idx": 4712}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428733, "dur": 6, "args": {"External id": 4714, "Ev Idx": 4713}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428741, "dur": 8, "args": {"External id": 4715, "Ev Idx": 4714}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428742, "dur": 7, "args": {"External id": 4716, "Ev Idx": 4715}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428742, "dur": 1, "args": {"External id": 4717, "Ev Idx": 4716}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428744, "dur": 5, "args": {"External id": 4718, "Ev Idx": 4717}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428751, "dur": 7, "args": {"External id": 4719, "Ev Idx": 4718}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428751, "dur": 7, "args": {"External id": 4720, "Ev Idx": 4719}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428752, "dur": 1, "args": {"External id": 4721, "Ev Idx": 4720}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428753, "dur": 5, "args": {"External id": 4722, "Ev Idx": 4721}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672428760, "dur": 16, "args": {"External id": 4723, "Ev Idx": 4722}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672428761, "dur": 15, "args": {"External id": 4724, "Ev Idx": 4723}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428765, "dur": 0, "args": {"External id": 4725, "Ev Idx": 4724}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428778, "dur": 9, "args": {"External id": 4726, "Ev Idx": 4725}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428778, "dur": 9, "args": {"External id": 4727, "Ev Idx": 4726}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428778, "dur": 2, "args": {"External id": 4728, "Ev Idx": 4727}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428780, "dur": 7, "args": {"External id": 4729, "Ev Idx": 4728}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428789, "dur": 8, "args": {"External id": 4730, "Ev Idx": 4729}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428790, "dur": 7, "args": {"External id": 4731, "Ev Idx": 4730}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428790, "dur": 2, "args": {"External id": 4732, "Ev Idx": 4731}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428792, "dur": 5, "args": {"External id": 4733, "Ev Idx": 4732}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428799, "dur": 8, "args": {"External id": 4734, "Ev Idx": 4733}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428800, "dur": 7, "args": {"External id": 4735, "Ev Idx": 4734}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428800, "dur": 1, "args": {"External id": 4736, "Ev Idx": 4735}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428802, "dur": 4, "args": {"External id": 4737, "Ev Idx": 4736}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428809, "dur": 7, "args": {"External id": 4738, "Ev Idx": 4737}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428809, "dur": 7, "args": {"External id": 4739, "Ev Idx": 4738}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428809, "dur": 2, "args": {"External id": 4740, "Ev Idx": 4739}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428811, "dur": 5, "args": {"External id": 4741, "Ev Idx": 4740}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428819, "dur": 13, "args": {"External id": 4742, "Ev Idx": 4741}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428819, "dur": 12, "args": {"External id": 4743, "Ev Idx": 4742}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428819, "dur": 4, "args": {"External id": 4744, "Ev Idx": 4743}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428823, "dur": 8, "args": {"External id": 4745, "Ev Idx": 4744}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672428833, "dur": 8, "args": {"External id": 4746, "Ev Idx": 4745}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672428833, "dur": 8, "args": {"External id": 4747, "Ev Idx": 4746}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428834, "dur": 1, "args": {"External id": 4748, "Ev Idx": 4747}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672428836, "dur": 5, "args": {"External id": 4749, "Ev Idx": 4748}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672428858, "dur": 2, "args": {"External id": 4750, "Ev Idx": 4749}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672428859, "dur": 1, "args": {"External id": 4751, "Ev Idx": 4750}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428862, "dur": 3, "args": {"External id": 4752, "Ev Idx": 4751}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428862, "dur": 3, "args": {"External id": 4753, "Ev Idx": 4752}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428866, "dur": 3, "args": {"External id": 4754, "Ev Idx": 4753}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428866, "dur": 3, "args": {"External id": 4755, "Ev Idx": 4754}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428870, "dur": 3, "args": {"External id": 4756, "Ev Idx": 4755}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428870, "dur": 2, "args": {"External id": 4757, "Ev Idx": 4756}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428873, "dur": 3, "args": {"External id": 4758, "Ev Idx": 4757}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428873, "dur": 3, "args": {"External id": 4759, "Ev Idx": 4758}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428876, "dur": 3, "args": {"External id": 4760, "Ev Idx": 4759}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428876, "dur": 3, "args": {"External id": 4761, "Ev Idx": 4760}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428879, "dur": 3, "args": {"External id": 4762, "Ev Idx": 4761}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428879, "dur": 3, "args": {"External id": 4763, "Ev Idx": 4762}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428882, "dur": 3, "args": {"External id": 4764, "Ev Idx": 4763}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428883, "dur": 2, "args": {"External id": 4765, "Ev Idx": 4764}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428886, "dur": 2, "args": {"External id": 4766, "Ev Idx": 4765}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428886, "dur": 2, "args": {"External id": 4767, "Ev Idx": 4766}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428889, "dur": 2, "args": {"External id": 4768, "Ev Idx": 4767}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428889, "dur": 2, "args": {"External id": 4769, "Ev Idx": 4768}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428892, "dur": 3, "args": {"External id": 4770, "Ev Idx": 4769}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428892, "dur": 3, "args": {"External id": 4771, "Ev Idx": 4770}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428895, "dur": 3, "args": {"External id": 4772, "Ev Idx": 4771}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428895, "dur": 3, "args": {"External id": 4773, "Ev Idx": 4772}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428898, "dur": 3, "args": {"External id": 4774, "Ev Idx": 4773}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428898, "dur": 3, "args": {"External id": 4775, "Ev Idx": 4774}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428902, "dur": 2, "args": {"External id": 4776, "Ev Idx": 4775}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428902, "dur": 2, "args": {"External id": 4777, "Ev Idx": 4776}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428905, "dur": 3, "args": {"External id": 4778, "Ev Idx": 4777}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428905, "dur": 2, "args": {"External id": 4779, "Ev Idx": 4778}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428908, "dur": 3, "args": {"External id": 4780, "Ev Idx": 4779}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428908, "dur": 3, "args": {"External id": 4781, "Ev Idx": 4780}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428911, "dur": 3, "args": {"External id": 4782, "Ev Idx": 4781}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428911, "dur": 3, "args": {"External id": 4783, "Ev Idx": 4782}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428914, "dur": 3, "args": {"External id": 4784, "Ev Idx": 4783}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428915, "dur": 2, "args": {"External id": 4785, "Ev Idx": 4784}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428918, "dur": 2, "args": {"External id": 4786, "Ev Idx": 4785}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428918, "dur": 2, "args": {"External id": 4787, "Ev Idx": 4786}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428921, "dur": 3, "args": {"External id": 4788, "Ev Idx": 4787}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428921, "dur": 3, "args": {"External id": 4789, "Ev Idx": 4788}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428924, "dur": 4, "args": {"External id": 4790, "Ev Idx": 4789}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428925, "dur": 3, "args": {"External id": 4791, "Ev Idx": 4790}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428929, "dur": 2, "args": {"External id": 4792, "Ev Idx": 4791}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428929, "dur": 2, "args": {"External id": 4793, "Ev Idx": 4792}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428932, "dur": 2, "args": {"External id": 4794, "Ev Idx": 4793}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428932, "dur": 2, "args": {"External id": 4795, "Ev Idx": 4794}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428935, "dur": 3, "args": {"External id": 4796, "Ev Idx": 4795}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428935, "dur": 3, "args": {"External id": 4797, "Ev Idx": 4796}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428938, "dur": 3, "args": {"External id": 4798, "Ev Idx": 4797}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428938, "dur": 3, "args": {"External id": 4799, "Ev Idx": 4798}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428941, "dur": 3, "args": {"External id": 4800, "Ev Idx": 4799}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428942, "dur": 2, "args": {"External id": 4801, "Ev Idx": 4800}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428944, "dur": 3, "args": {"External id": 4802, "Ev Idx": 4801}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428945, "dur": 2, "args": {"External id": 4803, "Ev Idx": 4802}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428948, "dur": 3, "args": {"External id": 4804, "Ev Idx": 4803}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428948, "dur": 2, "args": {"External id": 4805, "Ev Idx": 4804}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428951, "dur": 3, "args": {"External id": 4806, "Ev Idx": 4805}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428951, "dur": 3, "args": {"External id": 4807, "Ev Idx": 4806}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428954, "dur": 3, "args": {"External id": 4808, "Ev Idx": 4807}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428954, "dur": 3, "args": {"External id": 4809, "Ev Idx": 4808}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428957, "dur": 3, "args": {"External id": 4810, "Ev Idx": 4809}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428958, "dur": 2, "args": {"External id": 4811, "Ev Idx": 4810}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428961, "dur": 3, "args": {"External id": 4812, "Ev Idx": 4811}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428961, "dur": 2, "args": {"External id": 4813, "Ev Idx": 4812}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428964, "dur": 3, "args": {"External id": 4814, "Ev Idx": 4813}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428964, "dur": 3, "args": {"External id": 4815, "Ev Idx": 4814}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428967, "dur": 3, "args": {"External id": 4816, "Ev Idx": 4815}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428967, "dur": 3, "args": {"External id": 4817, "Ev Idx": 4816}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428970, "dur": 3, "args": {"External id": 4818, "Ev Idx": 4817}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428970, "dur": 3, "args": {"External id": 4819, "Ev Idx": 4818}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428973, "dur": 4, "args": {"External id": 4820, "Ev Idx": 4819}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428973, "dur": 3, "args": {"External id": 4821, "Ev Idx": 4820}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428977, "dur": 3, "args": {"External id": 4822, "Ev Idx": 4821}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428977, "dur": 3, "args": {"External id": 4823, "Ev Idx": 4822}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428980, "dur": 3, "args": {"External id": 4824, "Ev Idx": 4823}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428981, "dur": 2, "args": {"External id": 4825, "Ev Idx": 4824}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428984, "dur": 2, "args": {"External id": 4826, "Ev Idx": 4825}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428984, "dur": 2, "args": {"External id": 4827, "Ev Idx": 4826}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428987, "dur": 2, "args": {"External id": 4828, "Ev Idx": 4827}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428987, "dur": 2, "args": {"External id": 4829, "Ev Idx": 4828}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428990, "dur": 3, "args": {"External id": 4830, "Ev Idx": 4829}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428990, "dur": 3, "args": {"External id": 4831, "Ev Idx": 4830}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428993, "dur": 3, "args": {"External id": 4832, "Ev Idx": 4831}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428993, "dur": 3, "args": {"External id": 4833, "Ev Idx": 4832}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672428996, "dur": 3, "args": {"External id": 4834, "Ev Idx": 4833}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672428997, "dur": 2, "args": {"External id": 4835, "Ev Idx": 4834}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429000, "dur": 3, "args": {"External id": 4836, "Ev Idx": 4835}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429000, "dur": 3, "args": {"External id": 4837, "Ev Idx": 4836}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429004, "dur": 3, "args": {"External id": 4838, "Ev Idx": 4837}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429004, "dur": 2, "args": {"External id": 4839, "Ev Idx": 4838}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429007, "dur": 3, "args": {"External id": 4840, "Ev Idx": 4839}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429007, "dur": 2, "args": {"External id": 4841, "Ev Idx": 4840}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429010, "dur": 3, "args": {"External id": 4842, "Ev Idx": 4841}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429010, "dur": 3, "args": {"External id": 4843, "Ev Idx": 4842}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429013, "dur": 3, "args": {"External id": 4844, "Ev Idx": 4843}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429013, "dur": 3, "args": {"External id": 4845, "Ev Idx": 4844}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429017, "dur": 3, "args": {"External id": 4846, "Ev Idx": 4845}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429017, "dur": 2, "args": {"External id": 4847, "Ev Idx": 4846}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429020, "dur": 3, "args": {"External id": 4848, "Ev Idx": 4847}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429020, "dur": 3, "args": {"External id": 4849, "Ev Idx": 4848}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429023, "dur": 3, "args": {"External id": 4850, "Ev Idx": 4849}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429024, "dur": 2, "args": {"External id": 4851, "Ev Idx": 4850}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429026, "dur": 4, "args": {"External id": 4852, "Ev Idx": 4851}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429027, "dur": 2, "args": {"External id": 4853, "Ev Idx": 4852}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429030, "dur": 3, "args": {"External id": 4854, "Ev Idx": 4853}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429030, "dur": 2, "args": {"External id": 4855, "Ev Idx": 4854}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429033, "dur": 3, "args": {"External id": 4856, "Ev Idx": 4855}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429033, "dur": 3, "args": {"External id": 4857, "Ev Idx": 4856}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429036, "dur": 3, "args": {"External id": 4858, "Ev Idx": 4857}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429036, "dur": 3, "args": {"External id": 4859, "Ev Idx": 4858}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429039, "dur": 4, "args": {"External id": 4860, "Ev Idx": 4859}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429040, "dur": 2, "args": {"External id": 4861, "Ev Idx": 4860}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429043, "dur": 3, "args": {"External id": 4862, "Ev Idx": 4861}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429043, "dur": 2, "args": {"External id": 4863, "Ev Idx": 4862}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429046, "dur": 3, "args": {"External id": 4864, "Ev Idx": 4863}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429046, "dur": 3, "args": {"External id": 4865, "Ev Idx": 4864}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429049, "dur": 3, "args": {"External id": 4866, "Ev Idx": 4865}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429049, "dur": 3, "args": {"External id": 4867, "Ev Idx": 4866}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429052, "dur": 3, "args": {"External id": 4868, "Ev Idx": 4867}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429053, "dur": 2, "args": {"External id": 4869, "Ev Idx": 4868}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429055, "dur": 3, "args": {"External id": 4870, "Ev Idx": 4869}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429056, "dur": 2, "args": {"External id": 4871, "Ev Idx": 4870}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429059, "dur": 3, "args": {"External id": 4872, "Ev Idx": 4871}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429059, "dur": 3, "args": {"External id": 4873, "Ev Idx": 4872}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429062, "dur": 3, "args": {"External id": 4874, "Ev Idx": 4873}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429062, "dur": 3, "args": {"External id": 4875, "Ev Idx": 4874}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429065, "dur": 3, "args": {"External id": 4876, "Ev Idx": 4875}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429066, "dur": 2, "args": {"External id": 4877, "Ev Idx": 4876}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429069, "dur": 2, "args": {"External id": 4878, "Ev Idx": 4877}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429069, "dur": 2, "args": {"External id": 4879, "Ev Idx": 4878}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429072, "dur": 3, "args": {"External id": 4880, "Ev Idx": 4879}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429072, "dur": 3, "args": {"External id": 4881, "Ev Idx": 4880}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429075, "dur": 3, "args": {"External id": 4882, "Ev Idx": 4881}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429075, "dur": 3, "args": {"External id": 4883, "Ev Idx": 4882}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429079, "dur": 2, "args": {"External id": 4884, "Ev Idx": 4883}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429079, "dur": 2, "args": {"External id": 4885, "Ev Idx": 4884}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429082, "dur": 2, "args": {"External id": 4886, "Ev Idx": 4885}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429082, "dur": 2, "args": {"External id": 4887, "Ev Idx": 4886}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429085, "dur": 2, "args": {"External id": 4888, "Ev Idx": 4887}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429085, "dur": 2, "args": {"External id": 4889, "Ev Idx": 4888}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429088, "dur": 3, "args": {"External id": 4890, "Ev Idx": 4889}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429088, "dur": 2, "args": {"External id": 4891, "Ev Idx": 4890}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429091, "dur": 3, "args": {"External id": 4892, "Ev Idx": 4891}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429091, "dur": 3, "args": {"External id": 4893, "Ev Idx": 4892}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429094, "dur": 3, "args": {"External id": 4894, "Ev Idx": 4893}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429094, "dur": 3, "args": {"External id": 4895, "Ev Idx": 4894}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429097, "dur": 3, "args": {"External id": 4896, "Ev Idx": 4895}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429097, "dur": 3, "args": {"External id": 4897, "Ev Idx": 4896}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429100, "dur": 3, "args": {"External id": 4898, "Ev Idx": 4897}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429101, "dur": 2, "args": {"External id": 4899, "Ev Idx": 4898}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429104, "dur": 2, "args": {"External id": 4900, "Ev Idx": 4899}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429104, "dur": 2, "args": {"External id": 4901, "Ev Idx": 4900}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429107, "dur": 2, "args": {"External id": 4902, "Ev Idx": 4901}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429107, "dur": 2, "args": {"External id": 4903, "Ev Idx": 4902}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429110, "dur": 2, "args": {"External id": 4904, "Ev Idx": 4903}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429110, "dur": 2, "args": {"External id": 4905, "Ev Idx": 4904}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429113, "dur": 3, "args": {"External id": 4906, "Ev Idx": 4905}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429113, "dur": 3, "args": {"External id": 4907, "Ev Idx": 4906}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429116, "dur": 3, "args": {"External id": 4908, "Ev Idx": 4907}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429116, "dur": 3, "args": {"External id": 4909, "Ev Idx": 4908}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429120, "dur": 2, "args": {"External id": 4910, "Ev Idx": 4909}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429120, "dur": 2, "args": {"External id": 4911, "Ev Idx": 4910}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429123, "dur": 3, "args": {"External id": 4912, "Ev Idx": 4911}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429123, "dur": 3, "args": {"External id": 4913, "Ev Idx": 4912}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429126, "dur": 3, "args": {"External id": 4914, "Ev Idx": 4913}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429126, "dur": 3, "args": {"External id": 4915, "Ev Idx": 4914}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429129, "dur": 3, "args": {"External id": 4916, "Ev Idx": 4915}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429129, "dur": 3, "args": {"External id": 4917, "Ev Idx": 4916}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429133, "dur": 3, "args": {"External id": 4918, "Ev Idx": 4917}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429133, "dur": 2, "args": {"External id": 4919, "Ev Idx": 4918}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429136, "dur": 3, "args": {"External id": 4920, "Ev Idx": 4919}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429136, "dur": 3, "args": {"External id": 4921, "Ev Idx": 4920}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429139, "dur": 3, "args": {"External id": 4922, "Ev Idx": 4921}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429139, "dur": 3, "args": {"External id": 4923, "Ev Idx": 4922}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429142, "dur": 4, "args": {"External id": 4924, "Ev Idx": 4923}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429143, "dur": 2, "args": {"External id": 4925, "Ev Idx": 4924}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429146, "dur": 3, "args": {"External id": 4926, "Ev Idx": 4925}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429146, "dur": 3, "args": {"External id": 4927, "Ev Idx": 4926}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429149, "dur": 3, "args": {"External id": 4928, "Ev Idx": 4927}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429149, "dur": 3, "args": {"External id": 4929, "Ev Idx": 4928}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429152, "dur": 3, "args": {"External id": 4930, "Ev Idx": 4929}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429152, "dur": 3, "args": {"External id": 4931, "Ev Idx": 4930}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429155, "dur": 3, "args": {"External id": 4932, "Ev Idx": 4931}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429155, "dur": 3, "args": {"External id": 4933, "Ev Idx": 4932}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429158, "dur": 3, "args": {"External id": 4934, "Ev Idx": 4933}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429158, "dur": 3, "args": {"External id": 4935, "Ev Idx": 4934}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429161, "dur": 3, "args": {"External id": 4936, "Ev Idx": 4935}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429162, "dur": 2, "args": {"External id": 4937, "Ev Idx": 4936}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429165, "dur": 3, "args": {"External id": 4938, "Ev Idx": 4937}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429165, "dur": 3, "args": {"External id": 4939, "Ev Idx": 4938}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429168, "dur": 3, "args": {"External id": 4940, "Ev Idx": 4939}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429168, "dur": 3, "args": {"External id": 4941, "Ev Idx": 4940}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429171, "dur": 3, "args": {"External id": 4942, "Ev Idx": 4941}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429172, "dur": 2, "args": {"External id": 4943, "Ev Idx": 4942}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429174, "dur": 3, "args": {"External id": 4944, "Ev Idx": 4943}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429175, "dur": 2, "args": {"External id": 4945, "Ev Idx": 4944}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429178, "dur": 3, "args": {"External id": 4946, "Ev Idx": 4945}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429178, "dur": 2, "args": {"External id": 4947, "Ev Idx": 4946}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429181, "dur": 3, "args": {"External id": 4948, "Ev Idx": 4947}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429181, "dur": 3, "args": {"External id": 4949, "Ev Idx": 4948}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429184, "dur": 3, "args": {"External id": 4950, "Ev Idx": 4949}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429184, "dur": 3, "args": {"External id": 4951, "Ev Idx": 4950}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429188, "dur": 2, "args": {"External id": 4952, "Ev Idx": 4951}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429188, "dur": 2, "args": {"External id": 4953, "Ev Idx": 4952}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429191, "dur": 3, "args": {"External id": 4954, "Ev Idx": 4953}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429191, "dur": 2, "args": {"External id": 4955, "Ev Idx": 4954}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429194, "dur": 3, "args": {"External id": 4956, "Ev Idx": 4955}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429194, "dur": 2, "args": {"External id": 4957, "Ev Idx": 4956}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429197, "dur": 3, "args": {"External id": 4958, "Ev Idx": 4957}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429197, "dur": 3, "args": {"External id": 4959, "Ev Idx": 4958}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429200, "dur": 3, "args": {"External id": 4960, "Ev Idx": 4959}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429200, "dur": 3, "args": {"External id": 4961, "Ev Idx": 4960}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429203, "dur": 3, "args": {"External id": 4962, "Ev Idx": 4961}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429204, "dur": 2, "args": {"External id": 4963, "Ev Idx": 4962}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429207, "dur": 3, "args": {"External id": 4964, "Ev Idx": 4963}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429207, "dur": 2, "args": {"External id": 4965, "Ev Idx": 4964}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429210, "dur": 3, "args": {"External id": 4966, "Ev Idx": 4965}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429210, "dur": 3, "args": {"External id": 4967, "Ev Idx": 4966}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429213, "dur": 3, "args": {"External id": 4968, "Ev Idx": 4967}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429213, "dur": 3, "args": {"External id": 4969, "Ev Idx": 4968}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429216, "dur": 3, "args": {"External id": 4970, "Ev Idx": 4969}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429216, "dur": 3, "args": {"External id": 4971, "Ev Idx": 4970}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429219, "dur": 3, "args": {"External id": 4972, "Ev Idx": 4971}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429219, "dur": 3, "args": {"External id": 4973, "Ev Idx": 4972}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429222, "dur": 3, "args": {"External id": 4974, "Ev Idx": 4973}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429222, "dur": 3, "args": {"External id": 4975, "Ev Idx": 4974}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429225, "dur": 3, "args": {"External id": 4976, "Ev Idx": 4975}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429226, "dur": 2, "args": {"External id": 4977, "Ev Idx": 4976}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429229, "dur": 3, "args": {"External id": 4978, "Ev Idx": 4977}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429229, "dur": 3, "args": {"External id": 4979, "Ev Idx": 4978}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429232, "dur": 3, "args": {"External id": 4980, "Ev Idx": 4979}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429233, "dur": 2, "args": {"External id": 4981, "Ev Idx": 4980}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429236, "dur": 2, "args": {"External id": 4982, "Ev Idx": 4981}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429236, "dur": 2, "args": {"External id": 4983, "Ev Idx": 4982}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429239, "dur": 3, "args": {"External id": 4984, "Ev Idx": 4983}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429239, "dur": 2, "args": {"External id": 4985, "Ev Idx": 4984}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429242, "dur": 3, "args": {"External id": 4986, "Ev Idx": 4985}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429242, "dur": 3, "args": {"External id": 4987, "Ev Idx": 4986}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429245, "dur": 3, "args": {"External id": 4988, "Ev Idx": 4987}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429245, "dur": 3, "args": {"External id": 4989, "Ev Idx": 4988}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429248, "dur": 3, "args": {"External id": 4990, "Ev Idx": 4989}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429249, "dur": 2, "args": {"External id": 4991, "Ev Idx": 4990}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429252, "dur": 2, "args": {"External id": 4992, "Ev Idx": 4991}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429252, "dur": 2, "args": {"External id": 4993, "Ev Idx": 4992}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429254, "dur": 3, "args": {"External id": 4994, "Ev Idx": 4993}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429255, "dur": 2, "args": {"External id": 4995, "Ev Idx": 4994}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429257, "dur": 3, "args": {"External id": 4996, "Ev Idx": 4995}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429258, "dur": 2, "args": {"External id": 4997, "Ev Idx": 4996}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429260, "dur": 3, "args": {"External id": 4998, "Ev Idx": 4997}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429261, "dur": 2, "args": {"External id": 4999, "Ev Idx": 4998}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429264, "dur": 2, "args": {"External id": 5000, "Ev Idx": 4999}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429264, "dur": 2, "args": {"External id": 5001, "Ev Idx": 5000}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429267, "dur": 3, "args": {"External id": 5002, "Ev Idx": 5001}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429267, "dur": 3, "args": {"External id": 5003, "Ev Idx": 5002}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429270, "dur": 3, "args": {"External id": 5004, "Ev Idx": 5003}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429271, "dur": 2, "args": {"External id": 5005, "Ev Idx": 5004}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429274, "dur": 2, "args": {"External id": 5006, "Ev Idx": 5005}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429274, "dur": 2, "args": {"External id": 5007, "Ev Idx": 5006}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672429282, "dur": 14, "args": {"External id": 5008, "Ev Idx": 5007}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672429283, "dur": 13, "args": {"External id": 5009, "Ev Idx": 5008}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429284, "dur": 2, "args": {"External id": 5010, "Ev Idx": 5009}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672429287, "dur": 9, "args": {"External id": 5011, "Ev Idx": 5010}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672429326, "dur": 1, "args": {"External id": 5012, "Ev Idx": 5011}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672429326, "dur": 1, "args": {"External id": 5013, "Ev Idx": 5012}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429328, "dur": 4, "args": {"External id": 5014, "Ev Idx": 5013}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429329, "dur": 2, "args": {"External id": 5015, "Ev Idx": 5014}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429332, "dur": 3, "args": {"External id": 5016, "Ev Idx": 5015}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429332, "dur": 3, "args": {"External id": 5017, "Ev Idx": 5016}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429336, "dur": 3, "args": {"External id": 5018, "Ev Idx": 5017}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429336, "dur": 2, "args": {"External id": 5019, "Ev Idx": 5018}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429339, "dur": 3, "args": {"External id": 5020, "Ev Idx": 5019}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429339, "dur": 3, "args": {"External id": 5021, "Ev Idx": 5020}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429342, "dur": 3, "args": {"External id": 5022, "Ev Idx": 5021}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429342, "dur": 3, "args": {"External id": 5023, "Ev Idx": 5022}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429345, "dur": 3, "args": {"External id": 5024, "Ev Idx": 5023}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429346, "dur": 2, "args": {"External id": 5025, "Ev Idx": 5024}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429348, "dur": 3, "args": {"External id": 5026, "Ev Idx": 5025}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429349, "dur": 2, "args": {"External id": 5027, "Ev Idx": 5026}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429352, "dur": 2, "args": {"External id": 5028, "Ev Idx": 5027}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429352, "dur": 2, "args": {"External id": 5029, "Ev Idx": 5028}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429355, "dur": 3, "args": {"External id": 5030, "Ev Idx": 5029}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429355, "dur": 3, "args": {"External id": 5031, "Ev Idx": 5030}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429358, "dur": 3, "args": {"External id": 5032, "Ev Idx": 5031}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429358, "dur": 3, "args": {"External id": 5033, "Ev Idx": 5032}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429361, "dur": 3, "args": {"External id": 5034, "Ev Idx": 5033}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429362, "dur": 2, "args": {"External id": 5035, "Ev Idx": 5034}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429365, "dur": 2, "args": {"External id": 5036, "Ev Idx": 5035}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429365, "dur": 2, "args": {"External id": 5037, "Ev Idx": 5036}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429368, "dur": 3, "args": {"External id": 5038, "Ev Idx": 5037}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429368, "dur": 2, "args": {"External id": 5039, "Ev Idx": 5038}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429371, "dur": 3, "args": {"External id": 5040, "Ev Idx": 5039}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429371, "dur": 3, "args": {"External id": 5041, "Ev Idx": 5040}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429374, "dur": 3, "args": {"External id": 5042, "Ev Idx": 5041}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429374, "dur": 3, "args": {"External id": 5043, "Ev Idx": 5042}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429377, "dur": 3, "args": {"External id": 5044, "Ev Idx": 5043}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429378, "dur": 2, "args": {"External id": 5045, "Ev Idx": 5044}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429381, "dur": 2, "args": {"External id": 5046, "Ev Idx": 5045}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429381, "dur": 2, "args": {"External id": 5047, "Ev Idx": 5046}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429384, "dur": 3, "args": {"External id": 5048, "Ev Idx": 5047}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429384, "dur": 3, "args": {"External id": 5049, "Ev Idx": 5048}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429387, "dur": 3, "args": {"External id": 5050, "Ev Idx": 5049}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429387, "dur": 3, "args": {"External id": 5051, "Ev Idx": 5050}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429390, "dur": 3, "args": {"External id": 5052, "Ev Idx": 5051}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429390, "dur": 3, "args": {"External id": 5053, "Ev Idx": 5052}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429393, "dur": 3, "args": {"External id": 5054, "Ev Idx": 5053}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429393, "dur": 3, "args": {"External id": 5055, "Ev Idx": 5054}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429396, "dur": 3, "args": {"External id": 5056, "Ev Idx": 5055}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429397, "dur": 2, "args": {"External id": 5057, "Ev Idx": 5056}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429400, "dur": 3, "args": {"External id": 5058, "Ev Idx": 5057}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429400, "dur": 2, "args": {"External id": 5059, "Ev Idx": 5058}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429403, "dur": 3, "args": {"External id": 5060, "Ev Idx": 5059}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429403, "dur": 3, "args": {"External id": 5061, "Ev Idx": 5060}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429406, "dur": 3, "args": {"External id": 5062, "Ev Idx": 5061}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429406, "dur": 3, "args": {"External id": 5063, "Ev Idx": 5062}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429409, "dur": 3, "args": {"External id": 5064, "Ev Idx": 5063}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429409, "dur": 3, "args": {"External id": 5065, "Ev Idx": 5064}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429412, "dur": 3, "args": {"External id": 5066, "Ev Idx": 5065}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429413, "dur": 2, "args": {"External id": 5067, "Ev Idx": 5066}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429415, "dur": 3, "args": {"External id": 5068, "Ev Idx": 5067}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429416, "dur": 2, "args": {"External id": 5069, "Ev Idx": 5068}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429419, "dur": 3, "args": {"External id": 5070, "Ev Idx": 5069}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429419, "dur": 2, "args": {"External id": 5071, "Ev Idx": 5070}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429422, "dur": 3, "args": {"External id": 5072, "Ev Idx": 5071}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429422, "dur": 3, "args": {"External id": 5073, "Ev Idx": 5072}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429425, "dur": 3, "args": {"External id": 5074, "Ev Idx": 5073}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429425, "dur": 3, "args": {"External id": 5075, "Ev Idx": 5074}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429428, "dur": 4, "args": {"External id": 5076, "Ev Idx": 5075}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429429, "dur": 2, "args": {"External id": 5077, "Ev Idx": 5076}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429432, "dur": 3, "args": {"External id": 5078, "Ev Idx": 5077}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429432, "dur": 3, "args": {"External id": 5079, "Ev Idx": 5078}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429435, "dur": 3, "args": {"External id": 5080, "Ev Idx": 5079}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429435, "dur": 3, "args": {"External id": 5081, "Ev Idx": 5080}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429439, "dur": 3, "args": {"External id": 5082, "Ev Idx": 5081}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429439, "dur": 3, "args": {"External id": 5083, "Ev Idx": 5082}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429442, "dur": 3, "args": {"External id": 5084, "Ev Idx": 5083}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429442, "dur": 3, "args": {"External id": 5085, "Ev Idx": 5084}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429446, "dur": 3, "args": {"External id": 5086, "Ev Idx": 5085}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429446, "dur": 2, "args": {"External id": 5087, "Ev Idx": 5086}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429449, "dur": 3, "args": {"External id": 5088, "Ev Idx": 5087}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429449, "dur": 3, "args": {"External id": 5089, "Ev Idx": 5088}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429452, "dur": 3, "args": {"External id": 5090, "Ev Idx": 5089}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429452, "dur": 3, "args": {"External id": 5091, "Ev Idx": 5090}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429455, "dur": 3, "args": {"External id": 5092, "Ev Idx": 5091}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429455, "dur": 3, "args": {"External id": 5093, "Ev Idx": 5092}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429458, "dur": 3, "args": {"External id": 5094, "Ev Idx": 5093}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429459, "dur": 2, "args": {"External id": 5095, "Ev Idx": 5094}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429462, "dur": 3, "args": {"External id": 5096, "Ev Idx": 5095}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429462, "dur": 2, "args": {"External id": 5097, "Ev Idx": 5096}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429465, "dur": 3, "args": {"External id": 5098, "Ev Idx": 5097}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429465, "dur": 3, "args": {"External id": 5099, "Ev Idx": 5098}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429468, "dur": 3, "args": {"External id": 5100, "Ev Idx": 5099}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429468, "dur": 3, "args": {"External id": 5101, "Ev Idx": 5100}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429471, "dur": 3, "args": {"External id": 5102, "Ev Idx": 5101}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429471, "dur": 3, "args": {"External id": 5103, "Ev Idx": 5102}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429474, "dur": 3, "args": {"External id": 5104, "Ev Idx": 5103}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429474, "dur": 3, "args": {"External id": 5105, "Ev Idx": 5104}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429477, "dur": 3, "args": {"External id": 5106, "Ev Idx": 5105}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429478, "dur": 2, "args": {"External id": 5107, "Ev Idx": 5106}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429480, "dur": 3, "args": {"External id": 5108, "Ev Idx": 5107}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429481, "dur": 2, "args": {"External id": 5109, "Ev Idx": 5108}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429484, "dur": 2, "args": {"External id": 5110, "Ev Idx": 5109}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429484, "dur": 2, "args": {"External id": 5111, "Ev Idx": 5110}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429487, "dur": 2, "args": {"External id": 5112, "Ev Idx": 5111}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429487, "dur": 2, "args": {"External id": 5113, "Ev Idx": 5112}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429490, "dur": 2, "args": {"External id": 5114, "Ev Idx": 5113}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429490, "dur": 2, "args": {"External id": 5115, "Ev Idx": 5114}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429493, "dur": 2, "args": {"External id": 5116, "Ev Idx": 5115}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429493, "dur": 2, "args": {"External id": 5117, "Ev Idx": 5116}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429496, "dur": 2, "args": {"External id": 5118, "Ev Idx": 5117}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429496, "dur": 2, "args": {"External id": 5119, "Ev Idx": 5118}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429499, "dur": 5, "args": {"External id": 5120, "Ev Idx": 5119}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429501, "dur": 3, "args": {"External id": 5121, "Ev Idx": 5120}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429505, "dur": 3, "args": {"External id": 5122, "Ev Idx": 5121}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429505, "dur": 3, "args": {"External id": 5123, "Ev Idx": 5122}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429508, "dur": 3, "args": {"External id": 5124, "Ev Idx": 5123}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429508, "dur": 3, "args": {"External id": 5125, "Ev Idx": 5124}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429511, "dur": 3, "args": {"External id": 5126, "Ev Idx": 5125}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429512, "dur": 2, "args": {"External id": 5127, "Ev Idx": 5126}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429515, "dur": 2, "args": {"External id": 5128, "Ev Idx": 5127}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429515, "dur": 2, "args": {"External id": 5129, "Ev Idx": 5128}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429518, "dur": 3, "args": {"External id": 5130, "Ev Idx": 5129}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429518, "dur": 2, "args": {"External id": 5131, "Ev Idx": 5130}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429521, "dur": 3, "args": {"External id": 5132, "Ev Idx": 5131}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429521, "dur": 3, "args": {"External id": 5133, "Ev Idx": 5132}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429524, "dur": 3, "args": {"External id": 5134, "Ev Idx": 5133}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429525, "dur": 2, "args": {"External id": 5135, "Ev Idx": 5134}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429527, "dur": 3, "args": {"External id": 5136, "Ev Idx": 5135}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429528, "dur": 2, "args": {"External id": 5137, "Ev Idx": 5136}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429531, "dur": 2, "args": {"External id": 5138, "Ev Idx": 5137}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429531, "dur": 2, "args": {"External id": 5139, "Ev Idx": 5138}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429534, "dur": 3, "args": {"External id": 5140, "Ev Idx": 5139}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429534, "dur": 2, "args": {"External id": 5141, "Ev Idx": 5140}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429537, "dur": 3, "args": {"External id": 5142, "Ev Idx": 5141}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429537, "dur": 3, "args": {"External id": 5143, "Ev Idx": 5142}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429540, "dur": 3, "args": {"External id": 5144, "Ev Idx": 5143}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429540, "dur": 3, "args": {"External id": 5145, "Ev Idx": 5144}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429543, "dur": 3, "args": {"External id": 5146, "Ev Idx": 5145}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429544, "dur": 2, "args": {"External id": 5147, "Ev Idx": 5146}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429547, "dur": 2, "args": {"External id": 5148, "Ev Idx": 5147}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429547, "dur": 2, "args": {"External id": 5149, "Ev Idx": 5148}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429550, "dur": 3, "args": {"External id": 5150, "Ev Idx": 5149}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429550, "dur": 2, "args": {"External id": 5151, "Ev Idx": 5150}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429553, "dur": 3, "args": {"External id": 5152, "Ev Idx": 5151}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429553, "dur": 2, "args": {"External id": 5153, "Ev Idx": 5152}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429556, "dur": 3, "args": {"External id": 5154, "Ev Idx": 5153}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429556, "dur": 3, "args": {"External id": 5155, "Ev Idx": 5154}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429559, "dur": 3, "args": {"External id": 5156, "Ev Idx": 5155}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429559, "dur": 3, "args": {"External id": 5157, "Ev Idx": 5156}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429562, "dur": 3, "args": {"External id": 5158, "Ev Idx": 5157}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429562, "dur": 3, "args": {"External id": 5159, "Ev Idx": 5158}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429566, "dur": 2, "args": {"External id": 5160, "Ev Idx": 5159}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429566, "dur": 2, "args": {"External id": 5161, "Ev Idx": 5160}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429569, "dur": 3, "args": {"External id": 5162, "Ev Idx": 5161}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429569, "dur": 2, "args": {"External id": 5163, "Ev Idx": 5162}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429572, "dur": 3, "args": {"External id": 5164, "Ev Idx": 5163}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429572, "dur": 2, "args": {"External id": 5165, "Ev Idx": 5164}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429575, "dur": 3, "args": {"External id": 5166, "Ev Idx": 5165}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429575, "dur": 3, "args": {"External id": 5167, "Ev Idx": 5166}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429578, "dur": 3, "args": {"External id": 5168, "Ev Idx": 5167}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429578, "dur": 3, "args": {"External id": 5169, "Ev Idx": 5168}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429581, "dur": 3, "args": {"External id": 5170, "Ev Idx": 5169}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429581, "dur": 3, "args": {"External id": 5171, "Ev Idx": 5170}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429584, "dur": 4, "args": {"External id": 5172, "Ev Idx": 5171}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429585, "dur": 2, "args": {"External id": 5173, "Ev Idx": 5172}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429588, "dur": 3, "args": {"External id": 5174, "Ev Idx": 5173}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429588, "dur": 3, "args": {"External id": 5175, "Ev Idx": 5174}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429591, "dur": 3, "args": {"External id": 5176, "Ev Idx": 5175}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429591, "dur": 3, "args": {"External id": 5177, "Ev Idx": 5176}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429594, "dur": 3, "args": {"External id": 5178, "Ev Idx": 5177}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429595, "dur": 2, "args": {"External id": 5179, "Ev Idx": 5178}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429597, "dur": 3, "args": {"External id": 5180, "Ev Idx": 5179}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429598, "dur": 2, "args": {"External id": 5181, "Ev Idx": 5180}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429601, "dur": 2, "args": {"External id": 5182, "Ev Idx": 5181}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429601, "dur": 2, "args": {"External id": 5183, "Ev Idx": 5182}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429604, "dur": 3, "args": {"External id": 5184, "Ev Idx": 5183}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429604, "dur": 2, "args": {"External id": 5185, "Ev Idx": 5184}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429607, "dur": 3, "args": {"External id": 5186, "Ev Idx": 5185}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429607, "dur": 3, "args": {"External id": 5187, "Ev Idx": 5186}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429610, "dur": 3, "args": {"External id": 5188, "Ev Idx": 5187}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429610, "dur": 3, "args": {"External id": 5189, "Ev Idx": 5188}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429613, "dur": 3, "args": {"External id": 5190, "Ev Idx": 5189}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429613, "dur": 3, "args": {"External id": 5191, "Ev Idx": 5190}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429616, "dur": 3, "args": {"External id": 5192, "Ev Idx": 5191}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429617, "dur": 2, "args": {"External id": 5193, "Ev Idx": 5192}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429620, "dur": 2, "args": {"External id": 5194, "Ev Idx": 5193}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429620, "dur": 2, "args": {"External id": 5195, "Ev Idx": 5194}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429623, "dur": 2, "args": {"External id": 5196, "Ev Idx": 5195}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429623, "dur": 2, "args": {"External id": 5197, "Ev Idx": 5196}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429626, "dur": 3, "args": {"External id": 5198, "Ev Idx": 5197}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429626, "dur": 3, "args": {"External id": 5199, "Ev Idx": 5198}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429629, "dur": 3, "args": {"External id": 5200, "Ev Idx": 5199}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429630, "dur": 2, "args": {"External id": 5201, "Ev Idx": 5200}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429633, "dur": 3, "args": {"External id": 5202, "Ev Idx": 5201}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429633, "dur": 3, "args": {"External id": 5203, "Ev Idx": 5202}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429636, "dur": 3, "args": {"External id": 5204, "Ev Idx": 5203}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429636, "dur": 3, "args": {"External id": 5205, "Ev Idx": 5204}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429640, "dur": 3, "args": {"External id": 5206, "Ev Idx": 5205}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429640, "dur": 3, "args": {"External id": 5207, "Ev Idx": 5206}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429643, "dur": 4, "args": {"External id": 5208, "Ev Idx": 5207}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429644, "dur": 2, "args": {"External id": 5209, "Ev Idx": 5208}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429647, "dur": 3, "args": {"External id": 5210, "Ev Idx": 5209}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429647, "dur": 3, "args": {"External id": 5211, "Ev Idx": 5210}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429651, "dur": 3, "args": {"External id": 5212, "Ev Idx": 5211}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429651, "dur": 2, "args": {"External id": 5213, "Ev Idx": 5212}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429654, "dur": 3, "args": {"External id": 5214, "Ev Idx": 5213}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429654, "dur": 3, "args": {"External id": 5215, "Ev Idx": 5214}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429657, "dur": 3, "args": {"External id": 5216, "Ev Idx": 5215}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429657, "dur": 3, "args": {"External id": 5217, "Ev Idx": 5216}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429660, "dur": 3, "args": {"External id": 5218, "Ev Idx": 5217}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429660, "dur": 3, "args": {"External id": 5219, "Ev Idx": 5218}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429663, "dur": 3, "args": {"External id": 5220, "Ev Idx": 5219}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429663, "dur": 3, "args": {"External id": 5221, "Ev Idx": 5220}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429666, "dur": 3, "args": {"External id": 5222, "Ev Idx": 5221}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429667, "dur": 2, "args": {"External id": 5223, "Ev Idx": 5222}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429670, "dur": 2, "args": {"External id": 5224, "Ev Idx": 5223}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429670, "dur": 2, "args": {"External id": 5225, "Ev Idx": 5224}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429673, "dur": 3, "args": {"External id": 5226, "Ev Idx": 5225}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429673, "dur": 2, "args": {"External id": 5227, "Ev Idx": 5226}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429676, "dur": 3, "args": {"External id": 5228, "Ev Idx": 5227}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429676, "dur": 2, "args": {"External id": 5229, "Ev Idx": 5228}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429679, "dur": 3, "args": {"External id": 5230, "Ev Idx": 5229}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429679, "dur": 2, "args": {"External id": 5231, "Ev Idx": 5230}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429682, "dur": 3, "args": {"External id": 5232, "Ev Idx": 5231}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429683, "dur": 2, "args": {"External id": 5233, "Ev Idx": 5232}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429685, "dur": 4, "args": {"External id": 5234, "Ev Idx": 5233}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429686, "dur": 2, "args": {"External id": 5235, "Ev Idx": 5234}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429689, "dur": 3, "args": {"External id": 5236, "Ev Idx": 5235}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429689, "dur": 3, "args": {"External id": 5237, "Ev Idx": 5236}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429692, "dur": 3, "args": {"External id": 5238, "Ev Idx": 5237}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429692, "dur": 3, "args": {"External id": 5239, "Ev Idx": 5238}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429696, "dur": 2, "args": {"External id": 5240, "Ev Idx": 5239}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429696, "dur": 2, "args": {"External id": 5241, "Ev Idx": 5240}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429699, "dur": 3, "args": {"External id": 5242, "Ev Idx": 5241}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429699, "dur": 2, "args": {"External id": 5243, "Ev Idx": 5242}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429702, "dur": 3, "args": {"External id": 5244, "Ev Idx": 5243}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429702, "dur": 2, "args": {"External id": 5245, "Ev Idx": 5244}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429705, "dur": 3, "args": {"External id": 5246, "Ev Idx": 5245}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429705, "dur": 2, "args": {"External id": 5247, "Ev Idx": 5246}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429708, "dur": 3, "args": {"External id": 5248, "Ev Idx": 5247}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429708, "dur": 2, "args": {"External id": 5249, "Ev Idx": 5248}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429711, "dur": 3, "args": {"External id": 5250, "Ev Idx": 5249}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429711, "dur": 2, "args": {"External id": 5251, "Ev Idx": 5250}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429714, "dur": 3, "args": {"External id": 5252, "Ev Idx": 5251}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429714, "dur": 3, "args": {"External id": 5253, "Ev Idx": 5252}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429717, "dur": 3, "args": {"External id": 5254, "Ev Idx": 5253}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429718, "dur": 2, "args": {"External id": 5255, "Ev Idx": 5254}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429720, "dur": 3, "args": {"External id": 5256, "Ev Idx": 5255}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429721, "dur": 2, "args": {"External id": 5257, "Ev Idx": 5256}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429724, "dur": 2, "args": {"External id": 5258, "Ev Idx": 5257}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429724, "dur": 2, "args": {"External id": 5259, "Ev Idx": 5258}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429727, "dur": 3, "args": {"External id": 5260, "Ev Idx": 5259}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429727, "dur": 2, "args": {"External id": 5261, "Ev Idx": 5260}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429730, "dur": 3, "args": {"External id": 5262, "Ev Idx": 5261}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429730, "dur": 3, "args": {"External id": 5263, "Ev Idx": 5262}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429733, "dur": 3, "args": {"External id": 5264, "Ev Idx": 5263}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429734, "dur": 2, "args": {"External id": 5265, "Ev Idx": 5264}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429737, "dur": 2, "args": {"External id": 5266, "Ev Idx": 5265}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429737, "dur": 2, "args": {"External id": 5267, "Ev Idx": 5266}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 494, "tid": 494, "ts": 1742522672429740, "dur": 3, "args": {"External id": 5268, "Ev Idx": 5267}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429740, "dur": 2, "args": {"External id": 5269, "Ev Idx": 5268}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672429746, "dur": 12, "args": {"External id": 5270, "Ev Idx": 5269}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672429747, "dur": 11, "args": {"External id": 5271, "Ev Idx": 5270}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429748, "dur": 2, "args": {"External id": 5272, "Ev Idx": 5271}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672429750, "dur": 8, "args": {"External id": 5273, "Ev Idx": 5272}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672429779, "dur": 11, "args": {"External id": 5274, "Ev Idx": 5273}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672429779, "dur": 11, "args": {"External id": 5275, "Ev Idx": 5274}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429780, "dur": 2, "args": {"External id": 5276, "Ev Idx": 5275}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672429783, "dur": 7, "args": {"External id": 5277, "Ev Idx": 5276}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672429804, "dur": 33, "args": {"External id": 5278, "Ev Idx": 5277}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672429808, "dur": 1, "args": {"External id": 5279, "Ev Idx": 5278}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672429809, "dur": 17, "args": {"External id": 5280, "Ev Idx": 5279}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672429810, "dur": 16, "args": {"External id": 5281, "Ev Idx": 5280}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672429810, "dur": 3, "args": {"External id": 5282, "Ev Idx": 5281}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672429814, "dur": 11, "args": {"External id": 5283, "Ev Idx": 5282}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672429856, "dur": 4, "args": {"External id": 5284, "Ev Idx": 5283}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672429857, "dur": 1, "args": {"External id": 5285, "Ev Idx": 5284}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672429860, "dur": 8, "args": {"External id": 5286, "Ev Idx": 5285}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672429874, "dur": 0, "args": {"External id": 5287, "Ev Idx": 5286}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672429875, "dur": 12, "args": {"External id": 5288, "Ev Idx": 5287}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672429879, "dur": 0, "args": {"External id": 5289, "Ev Idx": 5288}}, {"ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 494, "tid": 494, "ts": 1742522672429893, "dur": 15, "args": {"External id": 5290, "Ev Idx": 5289}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 494, "tid": 494, "ts": 1742522672429894, "dur": 4, "args": {"External id": 5291, "Ev Idx": 5290}}, {"ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 494, "tid": 494, "ts": 1742522672429898, "dur": 10, "args": {"External id": 5292, "Ev Idx": 5291}}, {"ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 494, "tid": 494, "ts": 1742522672429900, "dur": 7, "args": {"External id": 5293, "Ev Idx": 5292}}, {"ph": "X", "cat": "cpu_op", "name": "aten::select", "pid": 494, "tid": 494, "ts": 1742522672429911, "dur": 1, "args": {"External id": 5294, "Ev Idx": 5293}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672429912, "dur": 0, "args": {"External id": 5295, "Ev Idx": 5294}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672429915, "dur": 10, "args": {"External id": 5296, "Ev Idx": 5295}}, {"ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 494, "tid": 494, "ts": 1742522672429943, "dur": 124, "args": {"External id": 5297, "Ev Idx": 5296}}, {"ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 494, "tid": 494, "ts": 1742522672429950, "dur": 116, "args": {"External id": 5298, "Ev Idx": 5297}}, {"ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 494, "tid": 494, "ts": 1742522672429966, "dur": 96, "args": {"External id": 5299, "Ev Idx": 5298}}, {"ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 494, "tid": 494, "ts": 1742522672430073, "dur": 3, "args": {"External id": 5300, "Ev Idx": 5299}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672430087, "dur": 26, "args": {"External id": 5301, "Ev Idx": 5300}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672430087, "dur": 26, "args": {"External id": 5302, "Ev Idx": 5301}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672430088, "dur": 6, "args": {"External id": 5303, "Ev Idx": 5302}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672430095, "dur": 17, "args": {"External id": 5304, "Ev Idx": 5303}}, {"ph": "X", "cat": "cpu_op", "name": "aten::mean", "pid": 494, "tid": 494, "ts": 1742522672430117, "dur": 21, "args": {"External id": 5305, "Ev Idx": 5304}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672430123, "dur": 1, "args": {"External id": 5306, "Ev Idx": 5305}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672430141, "dur": 11, "args": {"External id": 5307, "Ev Idx": 5306}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672430144, "dur": 1, "args": {"External id": 5308, "Ev Idx": 5307}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672430156, "dur": 19, "args": {"External id": 5309, "Ev Idx": 5308}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672430157, "dur": 18, "args": {"External id": 5310, "Ev Idx": 5309}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672430158, "dur": 3, "args": {"External id": 5311, "Ev Idx": 5310}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672430162, "dur": 13, "args": {"External id": 5312, "Ev Idx": 5311}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672430177, "dur": 12, "args": {"External id": 5313, "Ev Idx": 5312}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672430177, "dur": 12, "args": {"External id": 5314, "Ev Idx": 5313}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672430178, "dur": 3, "args": {"External id": 5315, "Ev Idx": 5314}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672430181, "dur": 8, "args": {"External id": 5316, "Ev Idx": 5315}}, {"ph": "X", "cat": "cpu_op", "name": "aten::max", "pid": 494, "tid": 494, "ts": 1742522672430194, "dur": 22, "args": {"External id": 5317, "Ev Idx": 5316}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672430202, "dur": 0, "args": {"External id": 5318, "Ev Idx": 5317}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672430203, "dur": 0, "args": {"External id": 5319, "Ev Idx": 5318}}, {"ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 494, "tid": 494, "ts": 1742522672430220, "dur": 10, "args": {"External id": 5320, "Ev Idx": 5319}}, {"ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 494, "tid": 494, "ts": 1742522672430223, "dur": 0, "args": {"External id": 5321, "Ev Idx": 5320}}, {"ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 494, "tid": 494, "ts": 1742522672430233, "dur": 10, "args": {"External id": 5322, "Ev Idx": 5321}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 494, "tid": 494, "ts": 1742522672430233, "dur": 10, "args": {"External id": 5323, "Ev Idx": 5322}}, {"ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 494, "tid": 494, "ts": 1742522672430234, "dur": 2, "args": {"External id": 5324, "Ev Idx": 5323}}, {"ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 494, "tid": 494, "ts": 1742522672430236, "dur": 6, "args": {"External id": 5325, "Ev Idx": 5324}}, {"ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 494, "tid": 494, "ts": 1742522672511226, "dur": 11, "args": {"External id": 5326, "Ev Idx": 5325}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 494, "tid": 494, "ts": 1742522672511233, "dur": 3, "args": {"External id": 5327, "Ev Idx": 5326}}, {"ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 494, "tid": 494, "ts": 1742522672511372, "dur": 3, "args": {"External id": 5328, "Ev Idx": 5327}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 494, "tid": 494, "ts": 1742522672511373, "dur": 1, "args": {"External id": 5329, "Ev Idx": 5328}}, {"ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 494, "tid": 494, "ts": 1742522672511377, "dur": 0, "args": {"External id": 5330, "Ev Idx": 5329}}, {"ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 494, "tid": 494, "ts": 1742522672511377, "dur": 0, "args": {"External id": 5331, "Ev Idx": 5330}}, {"ph": "f", "id": 11, "pid": 0, "tid": 7, "ts": 1742522672307859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 11, "pid": 494, "tid": 494, "ts": 1742522672307825, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 12, "pid": 494, "tid": 494, "ts": 1742522672307861, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 25, "pid": 0, "tid": 7, "ts": 1742522672307886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 25, "pid": 494, "tid": 494, "ts": 1742522672307880, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 26, "pid": 494, "tid": 494, "ts": 1742522672307885, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 39, "pid": 0, "tid": 7, "ts": 1742522672307903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 39, "pid": 494, "tid": 494, "ts": 1742522672307898, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 40, "pid": 494, "tid": 494, "ts": 1742522672307902, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 53, "pid": 0, "tid": 7, "ts": 1742522672307919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 53, "pid": 494, "tid": 494, "ts": 1742522672307914, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 54, "pid": 494, "tid": 494, "ts": 1742522672307918, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 67, "pid": 0, "tid": 7, "ts": 1742522672307940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 67, "pid": 494, "tid": 494, "ts": 1742522672307933, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 68, "pid": 494, "tid": 494, "ts": 1742522672307938, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 81, "pid": 0, "tid": 7, "ts": 1742522672307958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 81, "pid": 494, "tid": 494, "ts": 1742522672307953, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 82, "pid": 494, "tid": 494, "ts": 1742522672307956, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 103, "pid": 0, "tid": 7, "ts": 1742522672307978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 103, "pid": 494, "tid": 494, "ts": 1742522672307973, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 104, "pid": 494, "tid": 494, "ts": 1742522672307976, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 117, "pid": 0, "tid": 7, "ts": 1742522672308149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 117, "pid": 494, "tid": 494, "ts": 1742522672308143, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 118, "pid": 494, "tid": 494, "ts": 1742522672308147, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 131, "pid": 0, "tid": 7, "ts": 1742522672308166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 131, "pid": 494, "tid": 494, "ts": 1742522672308161, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 132, "pid": 494, "tid": 494, "ts": 1742522672308165, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 145, "pid": 0, "tid": 7, "ts": 1742522672308182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 145, "pid": 494, "tid": 494, "ts": 1742522672308177, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 146, "pid": 494, "tid": 494, "ts": 1742522672308180, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 167, "pid": 0, "tid": 7, "ts": 1742522672308202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 167, "pid": 494, "tid": 494, "ts": 1742522672308197, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 168, "pid": 494, "tid": 494, "ts": 1742522672308201, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 189, "pid": 0, "tid": 7, "ts": 1742522672308224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 189, "pid": 494, "tid": 494, "ts": 1742522672308219, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 190, "pid": 494, "tid": 494, "ts": 1742522672308223, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 201, "pid": 0, "tid": 7, "ts": 1742522672308680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 201, "pid": 494, "tid": 494, "ts": 1742522672308652, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 218, "pid": 0, "tid": 7, "ts": 1742522672308717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 218, "pid": 494, "tid": 494, "ts": 1742522672308709, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 235, "pid": 0, "tid": 7, "ts": 1742522672308736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 235, "pid": 494, "tid": 494, "ts": 1742522672308729, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 252, "pid": 0, "tid": 7, "ts": 1742522672308756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 252, "pid": 494, "tid": 494, "ts": 1742522672308748, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 272, "pid": 0, "tid": 7, "ts": 1742522672308775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 272, "pid": 494, "tid": 494, "ts": 1742522672308769, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 298, "pid": 0, "tid": 7, "ts": 1742522672308846, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 298, "pid": 494, "tid": 494, "ts": 1742522672308804, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 311, "pid": 0, "tid": 7, "ts": 1742522672308875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 311, "pid": 494, "tid": 494, "ts": 1742522672308868, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 329, "pid": 0, "tid": 7, "ts": 1742522672308926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 329, "pid": 494, "tid": 494, "ts": 1742522672308915, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 330, "pid": 494, "tid": 494, "ts": 1742522672308933, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 336, "pid": 0, "tid": 7, "ts": 1742522672309123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 336, "pid": 494, "tid": 494, "ts": 1742522672309116, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 337, "pid": 494, "tid": 494, "ts": 1742522672309121, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 347, "pid": 494, "tid": 494, "ts": 1742522672309147, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 351, "pid": 494, "tid": 494, "ts": 1742522672309151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 354, "pid": 0, "tid": 7, "ts": 1742522672309194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 354, "pid": 494, "tid": 494, "ts": 1742522672309157, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 360, "pid": 0, "tid": 7, "ts": 1742522672309202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 360, "pid": 494, "tid": 494, "ts": 1742522672309196, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309258, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309962, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672309968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672310829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672312759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672312764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672312770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672312905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672312938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313631, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313983, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672313988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314043, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314296, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314380, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314460, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314484, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672314509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325319, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672325975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326542, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326602, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672326904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327147, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327231, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327235, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327244, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327247, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327375, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327960, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672327989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328043, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328064, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328579, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328674, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672328974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329299, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329379, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329401, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329477, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329516, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329524, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329602, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672329995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330148, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330249, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330707, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330916, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672330961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331404, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331513, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331552, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672331891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332219, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332223, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332386, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332851, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332856, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332909, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672332997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333674, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672333981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334003, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334244, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334342, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334449, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334497, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672334998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335175, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335703, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335880, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672335943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336484, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336652, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336711, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672336854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337206, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337249, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337342, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337559, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672337999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338052, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338279, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338623, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338676, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338850, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672338980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339001, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339344, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672339973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340142, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340147, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340398, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672340983, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341064, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341405, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341477, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341579, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672341829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342102, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342164, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342178, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342296, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342480, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342877, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342880, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342909, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672342990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343206, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343610, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343618, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343763, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672343978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344319, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344404, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344425, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672344964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345016, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345040, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345665, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672345966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346117, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346460, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346530, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672346822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347064, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347147, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347164, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347516, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347983, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672347988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348516, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348676, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672348957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349404, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349443, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349592, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349599, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672349995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350229, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350631, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350647, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350746, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350803, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350862, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672350940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351016, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351056, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351319, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351403, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351466, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351524, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672351799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352364, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352846, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672352997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353602, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353877, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672353938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354215, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354279, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354385, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354996, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672354999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355102, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355124, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672355927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356404, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356429, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356449, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356547, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672356768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357215, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357763, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357909, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672357988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358052, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358454, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358530, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358540, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358571, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358590, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358647, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358669, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358851, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672358922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359165, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359249, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359416, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359556, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672359990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360713, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672360988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361380, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361386, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361480, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361494, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361683, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672361998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362019, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362877, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362909, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672362957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672363887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364165, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364216, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364258, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364385, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364876, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364937, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672364993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365552, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365580, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365647, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365681, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365703, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672365990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366403, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366441, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366504, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366713, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672366988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367148, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367231, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367296, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367384, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367425, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367816, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672367938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368398, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368580, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368647, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672368866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369142, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369218, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369278, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369438, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369861, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369921, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369959, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369962, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672369984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370226, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370571, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370726, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370742, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672370999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371244, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371342, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371344, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371449, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371494, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371983, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672371988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372040, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372164, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372357, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372703, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372798, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372862, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372876, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372880, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672372987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373380, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672373843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374114, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374216, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374235, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374330, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374877, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374937, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672374996, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375016, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375206, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375254, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375278, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375533, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375669, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375726, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375960, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672375982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376398, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672376966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377175, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377342, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377382, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377683, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377758, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377819, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377877, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672377969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378382, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378533, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378556, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378592, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672378836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379165, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379175, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379178, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379231, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379286, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379803, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379959, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672379997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380247, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380674, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672380961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381216, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381375, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381379, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381449, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381602, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672381994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382001, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382032, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382090, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382330, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382819, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382862, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382880, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672382949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383443, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383547, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672383806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384278, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384497, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384846, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384850, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672384990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385571, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385623, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385676, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672385929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386379, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386416, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386652, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672386997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387043, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387148, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672387930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672388776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389032, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389129, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389203, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389226, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389438, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389742, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389851, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389916, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672389995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390148, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390480, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390705, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672390922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391416, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391484, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391962, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672391996, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392110, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392798, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672392998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393403, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393454, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393480, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393513, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393533, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672393773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394016, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394164, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394218, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394739, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394876, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672394979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395175, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395533, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395547, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395681, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672395903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396178, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396244, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672396986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397040, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397174, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397579, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397655, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397665, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672397970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398003, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398279, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398364, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398435, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398454, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398742, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672398998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399019, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399114, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399384, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672399950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400423, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400540, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400559, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672400879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401206, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401223, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401438, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672401982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402001, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402005, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402579, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402697, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402798, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402946, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672402999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403416, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403449, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403652, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403959, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672403980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404669, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404861, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672404989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405405, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405454, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405540, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405559, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405623, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672405848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406124, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406279, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406296, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406909, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672406985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407249, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407623, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 365, "pid": 0, "tid": 7, "ts": 1742522672407646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 365, "pid": 494, "tid": 494, "ts": 1742522672309203, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 366, "pid": 494, "tid": 494, "ts": 1742522672313268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 391, "pid": 0, "tid": 7, "ts": 1742522672407657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 391, "pid": 494, "tid": 494, "ts": 1742522672313583, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 409, "pid": 0, "tid": 7, "ts": 1742522672407663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 409, "pid": 494, "tid": 494, "ts": 1742522672313725, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 422, "pid": 494, "tid": 494, "ts": 1742522672313802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 423, "pid": 0, "tid": 7, "ts": 1742522672407677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 423, "pid": 494, "tid": 494, "ts": 1742522672313827, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 424, "pid": 494, "tid": 494, "ts": 1742522672313842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 425, "pid": 494, "tid": 494, "ts": 1742522672313844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 426, "pid": 494, "tid": 494, "ts": 1742522672313845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 427, "pid": 0, "tid": 7, "ts": 1742522672407680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 427, "pid": 494, "tid": 494, "ts": 1742522672313846, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 432, "pid": 494, "tid": 494, "ts": 1742522672314048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 433, "pid": 494, "tid": 494, "ts": 1742522672314053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 434, "pid": 494, "tid": 494, "ts": 1742522672314055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 435, "pid": 494, "tid": 494, "ts": 1742522672314057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 436, "pid": 494, "tid": 494, "ts": 1742522672314059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 437, "pid": 494, "tid": 494, "ts": 1742522672314061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 438, "pid": 494, "tid": 494, "ts": 1742522672314064, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 439, "pid": 494, "tid": 494, "ts": 1742522672314066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 440, "pid": 494, "tid": 494, "ts": 1742522672314068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 441, "pid": 494, "tid": 494, "ts": 1742522672314071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 442, "pid": 494, "tid": 494, "ts": 1742522672314073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 443, "pid": 494, "tid": 494, "ts": 1742522672314076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 444, "pid": 494, "tid": 494, "ts": 1742522672314078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 445, "pid": 494, "tid": 494, "ts": 1742522672314080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 446, "pid": 494, "tid": 494, "ts": 1742522672314082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 447, "pid": 494, "tid": 494, "ts": 1742522672314085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 448, "pid": 494, "tid": 494, "ts": 1742522672314087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 449, "pid": 494, "tid": 494, "ts": 1742522672314089, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 450, "pid": 494, "tid": 494, "ts": 1742522672314091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 460, "pid": 0, "tid": 7, "ts": 1742522672408443, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 460, "pid": 494, "tid": 494, "ts": 1742522672314186, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 472, "pid": 0, "tid": 7, "ts": 1742522672408448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 472, "pid": 494, "tid": 494, "ts": 1742522672314210, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 484, "pid": 0, "tid": 7, "ts": 1742522672408453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 484, "pid": 494, "tid": 494, "ts": 1742522672314222, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 496, "pid": 0, "tid": 7, "ts": 1742522672408458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 496, "pid": 494, "tid": 494, "ts": 1742522672314234, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 508, "pid": 0, "tid": 7, "ts": 1742522672408463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 508, "pid": 494, "tid": 494, "ts": 1742522672314244, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 520, "pid": 0, "tid": 7, "ts": 1742522672408468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 520, "pid": 494, "tid": 494, "ts": 1742522672314255, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 560, "pid": 0, "tid": 7, "ts": 1742522672408474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 560, "pid": 494, "tid": 494, "ts": 1742522672314896, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 571, "pid": 0, "tid": 7, "ts": 1742522672408481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 571, "pid": 494, "tid": 494, "ts": 1742522672314952, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 591, "pid": 0, "tid": 7, "ts": 1742522672408489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 591, "pid": 494, "tid": 494, "ts": 1742522672314993, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 610, "pid": 494, "tid": 494, "ts": 1742522672315015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 611, "pid": 0, "tid": 7, "ts": 1742522672408560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 611, "pid": 494, "tid": 494, "ts": 1742522672315018, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 622, "pid": 0, "tid": 7, "ts": 1742522672408719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 622, "pid": 494, "tid": 494, "ts": 1742522672315339, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 640, "pid": 0, "tid": 7, "ts": 1742522672408724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 640, "pid": 494, "tid": 494, "ts": 1742522672315381, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 646, "pid": 0, "tid": 7, "ts": 1742522672408727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 646, "pid": 494, "tid": 494, "ts": 1742522672315397, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 652, "pid": 0, "tid": 7, "ts": 1742522672408733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 652, "pid": 494, "tid": 494, "ts": 1742522672315405, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 657, "pid": 0, "tid": 7, "ts": 1742522672408737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 657, "pid": 494, "tid": 494, "ts": 1742522672315420, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 663, "pid": 0, "tid": 7, "ts": 1742522672408764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 663, "pid": 494, "tid": 494, "ts": 1742522672315435, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 681, "pid": 0, "tid": 7, "ts": 1742522672408774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 681, "pid": 494, "tid": 494, "ts": 1742522672315469, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 694, "pid": 0, "tid": 7, "ts": 1742522672408779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 694, "pid": 494, "tid": 494, "ts": 1742522672315509, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 704, "pid": 0, "tid": 7, "ts": 1742522672408793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 704, "pid": 494, "tid": 494, "ts": 1742522672315538, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 715, "pid": 0, "tid": 7, "ts": 1742522672408801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 715, "pid": 494, "tid": 494, "ts": 1742522672315565, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 725, "pid": 0, "tid": 7, "ts": 1742522672408810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 725, "pid": 494, "tid": 494, "ts": 1742522672315583, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 738, "pid": 0, "tid": 7, "ts": 1742522672408814, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 738, "pid": 494, "tid": 494, "ts": 1742522672315611, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 744, "pid": 0, "tid": 7, "ts": 1742522672408838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 744, "pid": 494, "tid": 494, "ts": 1742522672315625, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 763, "pid": 0, "tid": 7, "ts": 1742522672408863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 763, "pid": 494, "tid": 494, "ts": 1742522672315666, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 769, "pid": 0, "tid": 7, "ts": 1742522672408891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 769, "pid": 494, "tid": 494, "ts": 1742522672315684, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 782, "pid": 0, "tid": 7, "ts": 1742522672408913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 782, "pid": 494, "tid": 494, "ts": 1742522672315706, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 792, "pid": 0, "tid": 7, "ts": 1742522672408919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 792, "pid": 494, "tid": 494, "ts": 1742522672315724, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 802, "pid": 0, "tid": 7, "ts": 1742522672408923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 802, "pid": 494, "tid": 494, "ts": 1742522672315754, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 812, "pid": 0, "tid": 7, "ts": 1742522672408928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 812, "pid": 494, "tid": 494, "ts": 1742522672315779, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 822, "pid": 0, "tid": 7, "ts": 1742522672408931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 822, "pid": 494, "tid": 494, "ts": 1742522672315801, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 835, "pid": 0, "tid": 7, "ts": 1742522672408935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 835, "pid": 494, "tid": 494, "ts": 1742522672315824, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 853, "pid": 0, "tid": 7, "ts": 1742522672408939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 853, "pid": 494, "tid": 494, "ts": 1742522672315847, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 863, "pid": 0, "tid": 7, "ts": 1742522672408943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 863, "pid": 494, "tid": 494, "ts": 1742522672315867, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 881, "pid": 494, "tid": 494, "ts": 1742522672315897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 883, "pid": 0, "tid": 7, "ts": 1742522672408947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 883, "pid": 494, "tid": 494, "ts": 1742522672315901, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 893, "pid": 0, "tid": 7, "ts": 1742522672408951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 893, "pid": 494, "tid": 494, "ts": 1742522672315921, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 903, "pid": 0, "tid": 7, "ts": 1742522672408956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 903, "pid": 494, "tid": 494, "ts": 1742522672315939, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 913, "pid": 0, "tid": 7, "ts": 1742522672408976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 913, "pid": 494, "tid": 494, "ts": 1742522672315952, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 923, "pid": 0, "tid": 7, "ts": 1742522672408981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 923, "pid": 494, "tid": 494, "ts": 1742522672315962, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 930, "pid": 0, "tid": 7, "ts": 1742522672408987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 930, "pid": 494, "tid": 494, "ts": 1742522672315980, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 946, "pid": 0, "tid": 7, "ts": 1742522672408992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 946, "pid": 494, "tid": 494, "ts": 1742522672316138, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 956, "pid": 0, "tid": 7, "ts": 1742522672409004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 956, "pid": 494, "tid": 494, "ts": 1742522672316159, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 974, "pid": 494, "tid": 494, "ts": 1742522672316185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 976, "pid": 0, "tid": 7, "ts": 1742522672409010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 976, "pid": 494, "tid": 494, "ts": 1742522672316187, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 982, "pid": 0, "tid": 7, "ts": 1742522672409018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 982, "pid": 494, "tid": 494, "ts": 1742522672316200, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 996, "pid": 0, "tid": 7, "ts": 1742522672409023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 996, "pid": 494, "tid": 494, "ts": 1742522672316223, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1006, "pid": 0, "tid": 7, "ts": 1742522672409042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1006, "pid": 494, "tid": 494, "ts": 1742522672316240, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1024, "pid": 0, "tid": 7, "ts": 1742522672409047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1024, "pid": 494, "tid": 494, "ts": 1742522672316269, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1037, "pid": 0, "tid": 7, "ts": 1742522672409051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1037, "pid": 494, "tid": 494, "ts": 1742522672316289, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1052, "pid": 0, "tid": 7, "ts": 1742522672409056, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1052, "pid": 494, "tid": 494, "ts": 1742522672316459, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1055, "pid": 494, "tid": 494, "ts": 1742522672316469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1056, "pid": 494, "tid": 494, "ts": 1742522672316471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1057, "pid": 494, "tid": 494, "ts": 1742522672316471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1058, "pid": 494, "tid": 494, "ts": 1742522672316471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1059, "pid": 494, "tid": 494, "ts": 1742522672316472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1060, "pid": 494, "tid": 494, "ts": 1742522672316477, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1061, "pid": 494, "tid": 494, "ts": 1742522672316491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1062, "pid": 494, "tid": 494, "ts": 1742522672316492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1063, "pid": 494, "tid": 494, "ts": 1742522672316492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1064, "pid": 494, "tid": 494, "ts": 1742522672316493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1065, "pid": 494, "tid": 494, "ts": 1742522672316493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1066, "pid": 494, "tid": 494, "ts": 1742522672316494, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1067, "pid": 494, "tid": 494, "ts": 1742522672316494, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1069, "pid": 0, "tid": 7, "ts": 1742522672409092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1069, "pid": 494, "tid": 494, "ts": 1742522672316496, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1079, "pid": 0, "tid": 7, "ts": 1742522672409099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1079, "pid": 494, "tid": 494, "ts": 1742522672316512, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1092, "pid": 0, "tid": 7, "ts": 1742522672409107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1092, "pid": 494, "tid": 494, "ts": 1742522672316530, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1110, "pid": 0, "tid": 7, "ts": 1742522672409110, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1110, "pid": 494, "tid": 494, "ts": 1742522672316546, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1122, "pid": 0, "tid": 7, "ts": 1742522672409114, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1122, "pid": 494, "tid": 494, "ts": 1742522672316565, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1134, "pid": 0, "tid": 7, "ts": 1742522672409117, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1134, "pid": 494, "tid": 494, "ts": 1742522672316585, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1145, "pid": 0, "tid": 7, "ts": 1742522672409121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1145, "pid": 494, "tid": 494, "ts": 1742522672316605, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1160, "pid": 0, "tid": 7, "ts": 1742522672409125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1160, "pid": 494, "tid": 494, "ts": 1742522672316621, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1173, "pid": 0, "tid": 7, "ts": 1742522672409128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1173, "pid": 494, "tid": 494, "ts": 1742522672316630, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1191, "pid": 0, "tid": 7, "ts": 1742522672409131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1191, "pid": 494, "tid": 494, "ts": 1742522672316642, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1202, "pid": 0, "tid": 7, "ts": 1742522672409136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1202, "pid": 494, "tid": 494, "ts": 1742522672316653, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1228, "pid": 0, "tid": 7, "ts": 1742522672409140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1228, "pid": 494, "tid": 494, "ts": 1742522672316677, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1239, "pid": 0, "tid": 7, "ts": 1742522672409145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1239, "pid": 494, "tid": 494, "ts": 1742522672316689, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1250, "pid": 0, "tid": 7, "ts": 1742522672409150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1250, "pid": 494, "tid": 494, "ts": 1742522672316700, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1273, "pid": 0, "tid": 7, "ts": 1742522672409153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1273, "pid": 494, "tid": 494, "ts": 1742522672316718, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1291, "pid": 0, "tid": 7, "ts": 1742522672409158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1291, "pid": 494, "tid": 494, "ts": 1742522672316738, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1301, "pid": 0, "tid": 7, "ts": 1742522672409162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1301, "pid": 494, "tid": 494, "ts": 1742522672316748, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1311, "pid": 0, "tid": 7, "ts": 1742522672409167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1311, "pid": 494, "tid": 494, "ts": 1742522672316781, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1323, "pid": 0, "tid": 7, "ts": 1742522672409171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1323, "pid": 494, "tid": 494, "ts": 1742522672316799, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1338, "pid": 0, "tid": 7, "ts": 1742522672409176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1338, "pid": 494, "tid": 494, "ts": 1742522672316815, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1356, "pid": 0, "tid": 7, "ts": 1742522672409179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1356, "pid": 494, "tid": 494, "ts": 1742522672316829, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1362, "pid": 0, "tid": 7, "ts": 1742522672409183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1362, "pid": 494, "tid": 494, "ts": 1742522672316838, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1381, "pid": 494, "tid": 494, "ts": 1742522672316854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1382, "pid": 0, "tid": 7, "ts": 1742522672409253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1382, "pid": 494, "tid": 494, "ts": 1742522672316855, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1393, "pid": 0, "tid": 7, "ts": 1742522672409411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1393, "pid": 494, "tid": 494, "ts": 1742522672316871, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1411, "pid": 0, "tid": 7, "ts": 1742522672409417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1411, "pid": 494, "tid": 494, "ts": 1742522672316890, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1417, "pid": 0, "tid": 7, "ts": 1742522672409421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1417, "pid": 494, "tid": 494, "ts": 1742522672316900, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1423, "pid": 0, "tid": 7, "ts": 1742522672409427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1423, "pid": 494, "tid": 494, "ts": 1742522672316906, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1428, "pid": 0, "tid": 7, "ts": 1742522672409432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1428, "pid": 494, "tid": 494, "ts": 1742522672316913, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1434, "pid": 0, "tid": 7, "ts": 1742522672409459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1434, "pid": 494, "tid": 494, "ts": 1742522672316923, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1452, "pid": 0, "tid": 7, "ts": 1742522672409470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1452, "pid": 494, "tid": 494, "ts": 1742522672316939, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1465, "pid": 0, "tid": 7, "ts": 1742522672409473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1465, "pid": 494, "tid": 494, "ts": 1742522672316962, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1475, "pid": 0, "tid": 7, "ts": 1742522672409486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1475, "pid": 494, "tid": 494, "ts": 1742522672316976, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1486, "pid": 0, "tid": 7, "ts": 1742522672409494, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1486, "pid": 494, "tid": 494, "ts": 1742522672316993, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1496, "pid": 0, "tid": 7, "ts": 1742522672409502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1496, "pid": 494, "tid": 494, "ts": 1742522672317010, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1514, "pid": 494, "tid": 494, "ts": 1742522672317025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1516, "pid": 0, "tid": 7, "ts": 1742522672409506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1516, "pid": 494, "tid": 494, "ts": 1742522672317027, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1522, "pid": 0, "tid": 7, "ts": 1742522672409514, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1522, "pid": 494, "tid": 494, "ts": 1742522672317037, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1536, "pid": 0, "tid": 7, "ts": 1742522672409519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1536, "pid": 494, "tid": 494, "ts": 1742522672317052, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1549, "pid": 0, "tid": 7, "ts": 1742522672409538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1549, "pid": 494, "tid": 494, "ts": 1742522672317071, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1564, "pid": 0, "tid": 7, "ts": 1742522672409543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1564, "pid": 494, "tid": 494, "ts": 1742522672317089, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1567, "pid": 494, "tid": 494, "ts": 1742522672317094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1568, "pid": 494, "tid": 494, "ts": 1742522672317095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1569, "pid": 494, "tid": 494, "ts": 1742522672317095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1570, "pid": 494, "tid": 494, "ts": 1742522672317095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1571, "pid": 494, "tid": 494, "ts": 1742522672317096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1572, "pid": 494, "tid": 494, "ts": 1742522672317099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1573, "pid": 494, "tid": 494, "ts": 1742522672317100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1574, "pid": 494, "tid": 494, "ts": 1742522672317101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1575, "pid": 494, "tid": 494, "ts": 1742522672317101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1576, "pid": 494, "tid": 494, "ts": 1742522672317102, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1577, "pid": 494, "tid": 494, "ts": 1742522672317102, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1578, "pid": 494, "tid": 494, "ts": 1742522672317103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1579, "pid": 494, "tid": 494, "ts": 1742522672317103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 1581, "pid": 0, "tid": 7, "ts": 1742522672409578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1581, "pid": 494, "tid": 494, "ts": 1742522672317105, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1591, "pid": 0, "tid": 7, "ts": 1742522672409586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1591, "pid": 494, "tid": 494, "ts": 1742522672317119, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1601, "pid": 0, "tid": 7, "ts": 1742522672409590, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1601, "pid": 494, "tid": 494, "ts": 1742522672317133, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1622, "pid": 0, "tid": 7, "ts": 1742522672409596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1622, "pid": 494, "tid": 494, "ts": 1742522672317154, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1643, "pid": 0, "tid": 7, "ts": 1742522672409602, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1643, "pid": 494, "tid": 494, "ts": 1742522672317172, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1661, "pid": 0, "tid": 7, "ts": 1742522672409607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1661, "pid": 494, "tid": 494, "ts": 1742522672317186, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1679, "pid": 0, "tid": 7, "ts": 1742522672409611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1679, "pid": 494, "tid": 494, "ts": 1742522672317199, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1690, "pid": 0, "tid": 7, "ts": 1742522672409615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1690, "pid": 494, "tid": 494, "ts": 1742522672317216, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1710, "pid": 0, "tid": 7, "ts": 1742522672409621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1710, "pid": 494, "tid": 494, "ts": 1742522672317338, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1723, "pid": 0, "tid": 7, "ts": 1742522672409624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1723, "pid": 494, "tid": 494, "ts": 1742522672317353, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1730, "pid": 0, "tid": 7, "ts": 1742522672409627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1730, "pid": 494, "tid": 494, "ts": 1742522672317366, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1743, "pid": 0, "tid": 7, "ts": 1742522672409632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1743, "pid": 494, "tid": 494, "ts": 1742522672317387, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1753, "pid": 0, "tid": 7, "ts": 1742522672409636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1753, "pid": 494, "tid": 494, "ts": 1742522672317409, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1763, "pid": 0, "tid": 7, "ts": 1742522672409639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1763, "pid": 494, "tid": 494, "ts": 1742522672317421, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1773, "pid": 0, "tid": 7, "ts": 1742522672409643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1773, "pid": 494, "tid": 494, "ts": 1742522672317436, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1783, "pid": 0, "tid": 7, "ts": 1742522672409646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1783, "pid": 494, "tid": 494, "ts": 1742522672317447, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1796, "pid": 0, "tid": 7, "ts": 1742522672409649, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1796, "pid": 494, "tid": 494, "ts": 1742522672317474, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1807, "pid": 0, "tid": 7, "ts": 1742522672409654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1807, "pid": 494, "tid": 494, "ts": 1742522672317489, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1820, "pid": 0, "tid": 7, "ts": 1742522672409659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1820, "pid": 494, "tid": 494, "ts": 1742522672317504, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1838, "pid": 0, "tid": 7, "ts": 1742522672409664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1838, "pid": 494, "tid": 494, "ts": 1742522672317519, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1848, "pid": 0, "tid": 7, "ts": 1742522672409667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1848, "pid": 494, "tid": 494, "ts": 1742522672317542, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1858, "pid": 0, "tid": 7, "ts": 1742522672409670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1858, "pid": 494, "tid": 494, "ts": 1742522672317553, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1879, "pid": 0, "tid": 7, "ts": 1742522672409673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1879, "pid": 494, "tid": 494, "ts": 1742522672317572, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1889, "pid": 0, "tid": 7, "ts": 1742522672409677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1889, "pid": 494, "tid": 494, "ts": 1742522672317583, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1899, "pid": 0, "tid": 7, "ts": 1742522672409680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1899, "pid": 494, "tid": 494, "ts": 1742522672317592, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1909, "pid": 0, "tid": 7, "ts": 1742522672409683, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1909, "pid": 494, "tid": 494, "ts": 1742522672317601, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1922, "pid": 0, "tid": 7, "ts": 1742522672409688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1922, "pid": 494, "tid": 494, "ts": 1742522672317620, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1933, "pid": 0, "tid": 7, "ts": 1742522672409692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1933, "pid": 494, "tid": 494, "ts": 1742522672317631, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1946, "pid": 0, "tid": 7, "ts": 1742522672409696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1946, "pid": 494, "tid": 494, "ts": 1742522672317659, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1956, "pid": 0, "tid": 7, "ts": 1742522672409700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1956, "pid": 494, "tid": 494, "ts": 1742522672317674, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1966, "pid": 0, "tid": 7, "ts": 1742522672409706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1966, "pid": 494, "tid": 494, "ts": 1742522672317687, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1976, "pid": 0, "tid": 7, "ts": 1742522672409710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1976, "pid": 494, "tid": 494, "ts": 1742522672317709, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1986, "pid": 0, "tid": 7, "ts": 1742522672409714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1986, "pid": 494, "tid": 494, "ts": 1742522672317720, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 1998, "pid": 0, "tid": 7, "ts": 1742522672409719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 1998, "pid": 494, "tid": 494, "ts": 1742522672317752, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2015, "pid": 0, "tid": 7, "ts": 1742522672409722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2015, "pid": 494, "tid": 494, "ts": 1742522672317798, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2028, "pid": 0, "tid": 7, "ts": 1742522672409733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2028, "pid": 494, "tid": 494, "ts": 1742522672317828, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2046, "pid": 0, "tid": 7, "ts": 1742522672409737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2046, "pid": 494, "tid": 494, "ts": 1742522672317846, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2056, "pid": 0, "tid": 7, "ts": 1742522672409740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2056, "pid": 494, "tid": 494, "ts": 1742522672317860, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2066, "pid": 0, "tid": 7, "ts": 1742522672409743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2066, "pid": 494, "tid": 494, "ts": 1742522672317870, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2087, "pid": 0, "tid": 7, "ts": 1742522672409748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2087, "pid": 494, "tid": 494, "ts": 1742522672317894, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2100, "pid": 0, "tid": 7, "ts": 1742522672409751, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2100, "pid": 494, "tid": 494, "ts": 1742522672317910, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2110, "pid": 0, "tid": 7, "ts": 1742522672409756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2110, "pid": 494, "tid": 494, "ts": 1742522672317926, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2120, "pid": 0, "tid": 7, "ts": 1742522672409759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2120, "pid": 494, "tid": 494, "ts": 1742522672317936, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2138, "pid": 0, "tid": 7, "ts": 1742522672409763, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2138, "pid": 494, "tid": 494, "ts": 1742522672317949, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2148, "pid": 0, "tid": 7, "ts": 1742522672409766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2148, "pid": 494, "tid": 494, "ts": 1742522672317961, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2158, "pid": 0, "tid": 7, "ts": 1742522672409770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2158, "pid": 494, "tid": 494, "ts": 1742522672317969, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2195, "pid": 0, "tid": 7, "ts": 1742522672409773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2195, "pid": 494, "tid": 494, "ts": 1742522672318010, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2207, "pid": 0, "tid": 7, "ts": 1742522672409776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2207, "pid": 494, "tid": 494, "ts": 1742522672318030, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2213, "pid": 0, "tid": 7, "ts": 1742522672409782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2213, "pid": 494, "tid": 494, "ts": 1742522672318046, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2231, "pid": 0, "tid": 7, "ts": 1742522672409785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2231, "pid": 494, "tid": 494, "ts": 1742522672318086, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2252, "pid": 0, "tid": 7, "ts": 1742522672409794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2252, "pid": 494, "tid": 494, "ts": 1742522672318118, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 2258, "pid": 0, "tid": 7, "ts": 1742522672409805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 2258, "pid": 494, "tid": 494, "ts": 1742522672318135, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3199, "pid": 0, "tid": 7, "ts": 1742522672409812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3199, "pid": 494, "tid": 494, "ts": 1742522672318561, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3213, "pid": 0, "tid": 7, "ts": 1742522672409815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3213, "pid": 494, "tid": 494, "ts": 1742522672318584, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3224, "pid": 0, "tid": 7, "ts": 1742522672409818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3224, "pid": 494, "tid": 494, "ts": 1742522672318599, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3241, "pid": 0, "tid": 7, "ts": 1742522672409822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3241, "pid": 494, "tid": 494, "ts": 1742522672318620, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3247, "pid": 0, "tid": 7, "ts": 1742522672409825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3247, "pid": 494, "tid": 494, "ts": 1742522672318630, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3256, "pid": 0, "tid": 7, "ts": 1742522672409828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3256, "pid": 494, "tid": 494, "ts": 1742522672318661, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3278, "pid": 0, "tid": 7, "ts": 1742522672409833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3278, "pid": 494, "tid": 494, "ts": 1742522672318715, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3294, "pid": 0, "tid": 7, "ts": 1742522672409847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3294, "pid": 494, "tid": 494, "ts": 1742522672318805, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3308, "pid": 0, "tid": 7, "ts": 1742522672409853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3308, "pid": 494, "tid": 494, "ts": 1742522672318870, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3317, "pid": 0, "tid": 7, "ts": 1742522672409858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3317, "pid": 494, "tid": 494, "ts": 1742522672318892, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3330, "pid": 494, "tid": 494, "ts": 1742522672318949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3331, "pid": 494, "tid": 494, "ts": 1742522672318962, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3332, "pid": 494, "tid": 494, "ts": 1742522672318963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3333, "pid": 494, "tid": 494, "ts": 1742522672318964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3334, "pid": 0, "tid": 7, "ts": 1742522672409868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3334, "pid": 494, "tid": 494, "ts": 1742522672318965, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3359, "pid": 0, "tid": 7, "ts": 1742522672409963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3359, "pid": 494, "tid": 494, "ts": 1742522672319067, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3371, "pid": 494, "tid": 494, "ts": 1742522672319234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3373, "pid": 494, "tid": 494, "ts": 1742522672319239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3375, "pid": 494, "tid": 494, "ts": 1742522672319240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3377, "pid": 494, "tid": 494, "ts": 1742522672319240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3379, "pid": 494, "tid": 494, "ts": 1742522672319241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3380, "pid": 0, "tid": 7, "ts": 1742522672409968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3380, "pid": 494, "tid": 494, "ts": 1742522672319245, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3389, "pid": 0, "tid": 7, "ts": 1742522672409983, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3389, "pid": 494, "tid": 494, "ts": 1742522672319320, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3412, "pid": 0, "tid": 7, "ts": 1742522672409988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3412, "pid": 494, "tid": 494, "ts": 1742522672319383, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3424, "pid": 494, "tid": 494, "ts": 1742522672319485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3426, "pid": 494, "tid": 494, "ts": 1742522672319487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3428, "pid": 494, "tid": 494, "ts": 1742522672319487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3430, "pid": 494, "tid": 494, "ts": 1742522672319488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3432, "pid": 494, "tid": 494, "ts": 1742522672319489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3433, "pid": 0, "tid": 7, "ts": 1742522672409990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3433, "pid": 494, "tid": 494, "ts": 1742522672319493, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3445, "pid": 494, "tid": 494, "ts": 1742522672319572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3446, "pid": 494, "tid": 494, "ts": 1742522672319576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3447, "pid": 0, "tid": 7, "ts": 1742522672410018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3447, "pid": 494, "tid": 494, "ts": 1742522672319578, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3454, "pid": 0, "tid": 7, "ts": 1742522672410051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3454, "pid": 494, "tid": 494, "ts": 1742522672319610, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3475, "pid": 0, "tid": 7, "ts": 1742522672410063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3475, "pid": 494, "tid": 494, "ts": 1742522672319666, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3510, "pid": 494, "tid": 494, "ts": 1742522672319716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3511, "pid": 0, "tid": 7, "ts": 1742522672410078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3511, "pid": 494, "tid": 494, "ts": 1742522672319718, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3513, "pid": 0, "tid": 7, "ts": 1742522672410549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3513, "pid": 494, "tid": 494, "ts": 1742522672319723, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3525, "pid": 494, "tid": 494, "ts": 1742522672319766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3526, "pid": 494, "tid": 494, "ts": 1742522672319768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3527, "pid": 0, "tid": 7, "ts": 1742522672410577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3527, "pid": 494, "tid": 494, "ts": 1742522672319770, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3544, "pid": 0, "tid": 7, "ts": 1742522672410606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3544, "pid": 494, "tid": 494, "ts": 1742522672319804, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3556, "pid": 494, "tid": 494, "ts": 1742522672319899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3558, "pid": 494, "tid": 494, "ts": 1742522672319900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3560, "pid": 494, "tid": 494, "ts": 1742522672319901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3562, "pid": 494, "tid": 494, "ts": 1742522672319901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3564, "pid": 494, "tid": 494, "ts": 1742522672319902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3565, "pid": 0, "tid": 7, "ts": 1742522672410616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3565, "pid": 494, "tid": 494, "ts": 1742522672319905, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3587, "pid": 0, "tid": 7, "ts": 1742522672410673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3587, "pid": 494, "tid": 494, "ts": 1742522672320003, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3597, "pid": 494, "tid": 494, "ts": 1742522672320061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3598, "pid": 494, "tid": 494, "ts": 1742522672320065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3599, "pid": 0, "tid": 7, "ts": 1742522672410681, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3599, "pid": 494, "tid": 494, "ts": 1742522672320066, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3601, "pid": 0, "tid": 7, "ts": 1742522672410691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3601, "pid": 494, "tid": 494, "ts": 1742522672320075, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3610, "pid": 494, "tid": 494, "ts": 1742522672320105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3612, "pid": 0, "tid": 7, "ts": 1742522672410697, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3612, "pid": 494, "tid": 494, "ts": 1742522672320109, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3637, "pid": 0, "tid": 7, "ts": 1742522672410702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3637, "pid": 494, "tid": 494, "ts": 1742522672320162, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 3653, "pid": 494, "tid": 494, "ts": 1742522672320231, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3694, "pid": 0, "tid": 16, "ts": 1742522672410714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3694, "pid": 494, "tid": 494, "ts": 1742522672320258, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 3703, "pid": 494, "tid": 494, "ts": 1742522672320313, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3747, "pid": 0, "tid": 7, "ts": 1742522672410936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3747, "pid": 494, "tid": 494, "ts": 1742522672320347, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3757, "pid": 494, "tid": 494, "ts": 1742522672320465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3759, "pid": 494, "tid": 494, "ts": 1742522672320467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3761, "pid": 494, "tid": 494, "ts": 1742522672320467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3763, "pid": 494, "tid": 494, "ts": 1742522672320468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3765, "pid": 494, "tid": 494, "ts": 1742522672320468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3766, "pid": 0, "tid": 7, "ts": 1742522672410953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3766, "pid": 494, "tid": 494, "ts": 1742522672320472, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3782, "pid": 0, "tid": 7, "ts": 1742522672411037, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3782, "pid": 494, "tid": 494, "ts": 1742522672320510, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3793, "pid": 494, "tid": 494, "ts": 1742522672320593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3795, "pid": 494, "tid": 494, "ts": 1742522672320594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3797, "pid": 494, "tid": 494, "ts": 1742522672320594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3799, "pid": 494, "tid": 494, "ts": 1742522672320595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3801, "pid": 494, "tid": 494, "ts": 1742522672320596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3802, "pid": 0, "tid": 7, "ts": 1742522672411047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3802, "pid": 494, "tid": 494, "ts": 1742522672320599, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3803, "pid": 0, "tid": 7, "ts": 1742522672411091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3803, "pid": 494, "tid": 494, "ts": 1742522672320612, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3812, "pid": 0, "tid": 7, "ts": 1742522672411127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3812, "pid": 494, "tid": 494, "ts": 1742522672320629, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3828, "pid": 0, "tid": 7, "ts": 1742522672411151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3828, "pid": 494, "tid": 494, "ts": 1742522672320668, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3840, "pid": 494, "tid": 494, "ts": 1742522672320755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3842, "pid": 494, "tid": 494, "ts": 1742522672320757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3844, "pid": 494, "tid": 494, "ts": 1742522672320757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3846, "pid": 494, "tid": 494, "ts": 1742522672320758, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3848, "pid": 494, "tid": 494, "ts": 1742522672320758, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3849, "pid": 0, "tid": 7, "ts": 1742522672411156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3849, "pid": 494, "tid": 494, "ts": 1742522672320762, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3865, "pid": 0, "tid": 7, "ts": 1742522672411181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3865, "pid": 494, "tid": 494, "ts": 1742522672320794, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3877, "pid": 494, "tid": 494, "ts": 1742522672320881, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3879, "pid": 494, "tid": 494, "ts": 1742522672320882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3881, "pid": 494, "tid": 494, "ts": 1742522672320882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3883, "pid": 494, "tid": 494, "ts": 1742522672320883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3885, "pid": 494, "tid": 494, "ts": 1742522672320883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3886, "pid": 0, "tid": 7, "ts": 1742522672411185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3886, "pid": 494, "tid": 494, "ts": 1742522672320886, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3896, "pid": 494, "tid": 494, "ts": 1742522672320966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3898, "pid": 494, "tid": 494, "ts": 1742522672320966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3900, "pid": 494, "tid": 494, "ts": 1742522672320967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3902, "pid": 494, "tid": 494, "ts": 1742522672320967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3904, "pid": 494, "tid": 494, "ts": 1742522672320968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3905, "pid": 0, "tid": 7, "ts": 1742522672411198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3905, "pid": 494, "tid": 494, "ts": 1742522672320969, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3921, "pid": 0, "tid": 7, "ts": 1742522672411277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3921, "pid": 494, "tid": 494, "ts": 1742522672320993, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3932, "pid": 494, "tid": 494, "ts": 1742522672321057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3934, "pid": 494, "tid": 494, "ts": 1742522672321058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3936, "pid": 494, "tid": 494, "ts": 1742522672321058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3938, "pid": 494, "tid": 494, "ts": 1742522672321059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3940, "pid": 494, "tid": 494, "ts": 1742522672321060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 3941, "pid": 0, "tid": 7, "ts": 1742522672411285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3941, "pid": 494, "tid": 494, "ts": 1742522672321061, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3942, "pid": 0, "tid": 7, "ts": 1742522672411320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3942, "pid": 494, "tid": 494, "ts": 1742522672321071, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 3951, "pid": 494, "tid": 494, "ts": 1742522672321084, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3964, "pid": 0, "tid": 16, "ts": 1742522672411500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3964, "pid": 494, "tid": 494, "ts": 1742522672321094, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 3977, "pid": 494, "tid": 494, "ts": 1742522672321113, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3986, "pid": 0, "tid": 7, "ts": 1742522672411857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3986, "pid": 494, "tid": 494, "ts": 1742522672321127, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 3996, "pid": 0, "tid": 7, "ts": 1742522672411864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 3996, "pid": 494, "tid": 494, "ts": 1742522672321156, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4025, "pid": 0, "tid": 7, "ts": 1742522672411869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4025, "pid": 494, "tid": 494, "ts": 1742522672321257, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4034, "pid": 0, "tid": 7, "ts": 1742522672411876, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4034, "pid": 494, "tid": 494, "ts": 1742522672321298, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4049, "pid": 494, "tid": 494, "ts": 1742522672321333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4050, "pid": 0, "tid": 7, "ts": 1742522672411885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4050, "pid": 494, "tid": 494, "ts": 1742522672321344, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4051, "pid": 0, "tid": 7, "ts": 1742522672411888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4051, "pid": 494, "tid": 494, "ts": 1742522672321351, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4052, "pid": 494, "tid": 494, "ts": 1742522672321355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4053, "pid": 494, "tid": 494, "ts": 1742522672321356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4054, "pid": 494, "tid": 494, "ts": 1742522672321357, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4055, "pid": 0, "tid": 7, "ts": 1742522672411891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4055, "pid": 494, "tid": 494, "ts": 1742522672321358, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4062, "pid": 0, "tid": 7, "ts": 1742522672412562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4062, "pid": 494, "tid": 494, "ts": 1742522672321528, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4081, "pid": 494, "tid": 494, "ts": 1742522672321547, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4082, "pid": 0, "tid": 7, "ts": 1742522672412630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4082, "pid": 494, "tid": 494, "ts": 1742522672321549, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4093, "pid": 0, "tid": 7, "ts": 1742522672412786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4093, "pid": 494, "tid": 494, "ts": 1742522672321570, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4111, "pid": 0, "tid": 7, "ts": 1742522672412790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4111, "pid": 494, "tid": 494, "ts": 1742522672321591, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4117, "pid": 0, "tid": 7, "ts": 1742522672412793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4117, "pid": 494, "tid": 494, "ts": 1742522672321602, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4123, "pid": 0, "tid": 7, "ts": 1742522672412798, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4123, "pid": 494, "tid": 494, "ts": 1742522672321609, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4128, "pid": 0, "tid": 7, "ts": 1742522672412802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4128, "pid": 494, "tid": 494, "ts": 1742522672321617, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4134, "pid": 0, "tid": 7, "ts": 1742522672412827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4134, "pid": 494, "tid": 494, "ts": 1742522672321627, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4152, "pid": 0, "tid": 7, "ts": 1742522672412837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4152, "pid": 494, "tid": 494, "ts": 1742522672321643, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4165, "pid": 0, "tid": 7, "ts": 1742522672412840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4165, "pid": 494, "tid": 494, "ts": 1742522672321669, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4175, "pid": 0, "tid": 7, "ts": 1742522672412852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4175, "pid": 494, "tid": 494, "ts": 1742522672321684, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4186, "pid": 0, "tid": 7, "ts": 1742522672412858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4186, "pid": 494, "tid": 494, "ts": 1742522672321700, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4196, "pid": 0, "tid": 7, "ts": 1742522672412865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4196, "pid": 494, "tid": 494, "ts": 1742522672321712, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4202, "pid": 0, "tid": 7, "ts": 1742522672412869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4202, "pid": 494, "tid": 494, "ts": 1742522672321727, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4222, "pid": 494, "tid": 494, "ts": 1742522672321747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4224, "pid": 0, "tid": 7, "ts": 1742522672412873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4224, "pid": 494, "tid": 494, "ts": 1742522672321749, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4230, "pid": 0, "tid": 7, "ts": 1742522672412879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4230, "pid": 494, "tid": 494, "ts": 1742522672321760, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4244, "pid": 0, "tid": 7, "ts": 1742522672412883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4244, "pid": 494, "tid": 494, "ts": 1742522672321776, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4257, "pid": 0, "tid": 7, "ts": 1742522672412901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4257, "pid": 494, "tid": 494, "ts": 1742522672321794, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4263, "pid": 0, "tid": 7, "ts": 1742522672412904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4263, "pid": 494, "tid": 494, "ts": 1742522672321810, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4271, "pid": 0, "tid": 7, "ts": 1742522672412906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4271, "pid": 494, "tid": 494, "ts": 1742522672321823, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4279, "pid": 0, "tid": 7, "ts": 1742522672412911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4279, "pid": 494, "tid": 494, "ts": 1742522672321833, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4289, "pid": 0, "tid": 7, "ts": 1742522672412915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4289, "pid": 494, "tid": 494, "ts": 1742522672321879, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4301, "pid": 0, "tid": 7, "ts": 1742522672412918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4301, "pid": 494, "tid": 494, "ts": 1742522672321899, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4307, "pid": 494, "tid": 494, "ts": 1742522672321915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4311, "pid": 0, "tid": 7, "ts": 1742522672412924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4311, "pid": 494, "tid": 494, "ts": 1742522672321921, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4314, "pid": 494, "tid": 494, "ts": 1742522672321933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4318, "pid": 0, "tid": 7, "ts": 1742522672412928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4318, "pid": 494, "tid": 494, "ts": 1742522672321936, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4321, "pid": 494, "tid": 494, "ts": 1742522672321944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4325, "pid": 0, "tid": 7, "ts": 1742522672412933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4325, "pid": 494, "tid": 494, "ts": 1742522672321947, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4328, "pid": 494, "tid": 494, "ts": 1742522672321955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4332, "pid": 0, "tid": 7, "ts": 1742522672412937, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4332, "pid": 494, "tid": 494, "ts": 1742522672321957, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4335, "pid": 494, "tid": 494, "ts": 1742522672321965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4339, "pid": 0, "tid": 7, "ts": 1742522672412942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4339, "pid": 494, "tid": 494, "ts": 1742522672321967, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4352, "pid": 0, "tid": 7, "ts": 1742522672412946, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4352, "pid": 494, "tid": 494, "ts": 1742522672321983, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4356, "pid": 494, "tid": 494, "ts": 1742522672321995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4360, "pid": 0, "tid": 7, "ts": 1742522672412957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4360, "pid": 494, "tid": 494, "ts": 1742522672321998, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4363, "pid": 494, "tid": 494, "ts": 1742522672322006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4367, "pid": 0, "tid": 7, "ts": 1742522672412961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4367, "pid": 494, "tid": 494, "ts": 1742522672322008, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4370, "pid": 494, "tid": 494, "ts": 1742522672322015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4374, "pid": 0, "tid": 7, "ts": 1742522672412965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4374, "pid": 494, "tid": 494, "ts": 1742522672322018, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4377, "pid": 494, "tid": 494, "ts": 1742522672322024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4381, "pid": 0, "tid": 7, "ts": 1742522672412970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4381, "pid": 494, "tid": 494, "ts": 1742522672322027, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4393, "pid": 0, "tid": 7, "ts": 1742522672412974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4393, "pid": 494, "tid": 494, "ts": 1742522672322041, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 4398, "pid": 494, "tid": 494, "ts": 1742522672322049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 4402, "pid": 0, "tid": 7, "ts": 1742522672412981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 4402, "pid": 494, "tid": 494, "ts": 1742522672322052, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 5315, "pid": 0, "tid": 7, "ts": 1742522672412986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 5315, "pid": 494, "tid": 494, "ts": 1742522672322510, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 5319, "pid": 0, "tid": 7, "ts": 1742522672412988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 5319, "pid": 494, "tid": 494, "ts": 1742522672322520, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6233, "pid": 0, "tid": 7, "ts": 1742522672412992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6233, "pid": 494, "tid": 494, "ts": 1742522672322978, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6237, "pid": 0, "tid": 7, "ts": 1742522672412996, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6237, "pid": 494, "tid": 494, "ts": 1742522672322987, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6241, "pid": 494, "tid": 494, "ts": 1742522672323013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 6245, "pid": 0, "tid": 7, "ts": 1742522672413006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6245, "pid": 494, "tid": 494, "ts": 1742522672323017, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6262, "pid": 0, "tid": 7, "ts": 1742522672413010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6262, "pid": 494, "tid": 494, "ts": 1742522672323051, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6271, "pid": 0, "tid": 7, "ts": 1742522672413013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6271, "pid": 494, "tid": 494, "ts": 1742522672323064, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6278, "pid": 0, "tid": 7, "ts": 1742522672413018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6278, "pid": 494, "tid": 494, "ts": 1742522672323096, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6295, "pid": 0, "tid": 7, "ts": 1742522672413021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6295, "pid": 494, "tid": 494, "ts": 1742522672323115, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6309, "pid": 0, "tid": 7, "ts": 1742522672413026, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6309, "pid": 494, "tid": 494, "ts": 1742522672323135, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6320, "pid": 0, "tid": 7, "ts": 1742522672413028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6320, "pid": 494, "tid": 494, "ts": 1742522672323148, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6327, "pid": 494, "tid": 494, "ts": 1742522672323220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6333, "pid": 494, "tid": 494, "ts": 1742522672323239, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6337, "pid": 494, "tid": 494, "ts": 1742522672323283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6342, "pid": 494, "tid": 494, "ts": 1742522672323306, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6344, "pid": 494, "tid": 494, "ts": 1742522672323310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 6345, "pid": 0, "tid": 13, "ts": 1742522672413032, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6348, "pid": 494, "tid": 494, "ts": 1742522672323361, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 6367, "pid": 494, "tid": 494, "ts": 1742522672323390, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 6374, "pid": 494, "tid": 494, "ts": 1742522672323421, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6390, "pid": 0, "tid": 7, "ts": 1742522672413202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6390, "pid": 494, "tid": 494, "ts": 1742522672323453, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6404, "pid": 0, "tid": 7, "ts": 1742522672413206, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6404, "pid": 494, "tid": 494, "ts": 1742522672323484, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6418, "pid": 0, "tid": 7, "ts": 1742522672413216, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6418, "pid": 494, "tid": 494, "ts": 1742522672323502, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6422, "pid": 494, "tid": 494, "ts": 1742522672323513, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 6426, "pid": 0, "tid": 7, "ts": 1742522672413228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6426, "pid": 494, "tid": 494, "ts": 1742522672323518, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6438, "pid": 0, "tid": 7, "ts": 1742522672413234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6438, "pid": 494, "tid": 494, "ts": 1742522672323536, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6456, "pid": 0, "tid": 7, "ts": 1742522672413237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6456, "pid": 494, "tid": 494, "ts": 1742522672323562, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6470, "pid": 0, "tid": 7, "ts": 1742522672413256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6470, "pid": 494, "tid": 494, "ts": 1742522672323579, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 6474, "pid": 494, "tid": 494, "ts": 1742522672323586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 6478, "pid": 0, "tid": 7, "ts": 1742522672413261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 6478, "pid": 494, "tid": 494, "ts": 1742522672323590, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 6489, "pid": 494, "tid": 494, "ts": 1742522672323649, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672414895, "dur": 5, "args": {"External id": 6555, "cbid": 138, "correlation": 6555}}, {"ph": "f", "id": 6555, "pid": 494, "tid": 494, "ts": 1742522672414895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672414966, "dur": 0, "args": {"External id": 6562, "device": 0, "context": 1, "stream": 7, "correlation": 6562, "bytes": 8, "memory bandwidth (GB/s)": 0.010869565217391304}}, {"ph": "f", "id": 6562, "pid": 0, "tid": 7, "ts": 1742522672414966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672414936, "dur": 30, "args": {"External id": 6562, "cbid": 41, "correlation": 6562}}, {"ph": "s", "id": 6562, "pid": 494, "tid": 494, "ts": 1742522672414936, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672414967, "dur": 6, "args": {"External id": 6563, "cbid": 131, "correlation": 6563}}, {"ph": "s", "id": 6563, "pid": 494, "tid": 494, "ts": 1742522672414967, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672415097, "dur": 2, "args": {"External id": 6567, "cbid": 317, "correlation": 6567}}, {"ph": "f", "id": 6567, "pid": 494, "tid": 494, "ts": 1742522672415097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415121, "dur": 3, "args": {"External id": 6573, "cbid": 147, "correlation": 6573}}, {"ph": "s", "id": 6573, "pid": 494, "tid": 494, "ts": 1742522672415121, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamGetCaptureInfo_v2", "pid": 494, "tid": 494, "ts": 1742522672415161, "dur": 2, "args": {"External id": 6577, "cbid": 409, "correlation": 6577}}, {"ph": "f", "id": 6577, "pid": 494, "tid": 494, "ts": 1742522672415161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415183, "dur": 0, "args": {"External id": 6582, "cbid": 147, "correlation": 6582}}, {"ph": "s", "id": 6582, "pid": 494, "tid": 494, "ts": 1742522672415183, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaGetFuncBySymbol", "pid": 494, "tid": 494, "ts": 1742522672415186, "dur": 1, "args": {"External id": 6584, "cbid": 336, "correlation": 6584}}, {"ph": "f", "id": 6584, "pid": 494, "tid": 494, "ts": 1742522672415186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "ncclDevKernel_AllReduce_Sum_u64_TREE_LL(ncclDevKernelArgsStorage<4096ul>)", "pid": 0, "tid": 13, "ts": 1742522672415259, "dur": 245, "args": {"External id": 6585, "queued": 0, "device": 0, "context": 1, "stream": 13, "correlation": 6585, "registers per thread": 96, "shared memory": 103776, "blocks per SM": 0.007575758, "warps per SM": 0.15151516, "grid": [1, 1, 1], "block": [640, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 6585, "pid": 0, "tid": 13, "ts": 1742522672415259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415268, "dur": 0, "args": {"External id": 6588, "cbid": 147, "correlation": 6588}}, {"ph": "s", "id": 6588, "pid": 494, "tid": 494, "ts": 1742522672415268, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415302, "dur": 4, "args": {"External id": 6607, "cbid": 147, "correlation": 6607}}, {"ph": "s", "id": 6607, "pid": 494, "tid": 494, "ts": 1742522672415302, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415347, "dur": 1, "args": {"External id": 6614, "cbid": 147, "correlation": 6614}}, {"ph": "s", "id": 6614, "pid": 494, "tid": 494, "ts": 1742522672415347, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pageable)", "pid": 0, "tid": 7, "ts": 1742522672415507, "dur": 2, "args": {"External id": 6623, "device": 0, "context": 1, "stream": 7, "correlation": 6623, "bytes": 8, "memory bandwidth (GB/s)": 0.003289473684210526}}, {"ph": "f", "id": 6623, "pid": 0, "tid": 7, "ts": 1742522672415507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672415372, "dur": 141, "args": {"External id": 6623, "cbid": 41, "correlation": 6623}}, {"ph": "s", "id": 6623, "pid": 494, "tid": 494, "ts": 1742522672415372, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672415514, "dur": 5, "args": {"External id": 6624, "cbid": 131, "correlation": 6624}}, {"ph": "s", "id": 6624, "pid": 494, "tid": 494, "ts": 1742522672415514, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672415570, "dur": 2, "args": {"External id": 6635, "cbid": 138, "correlation": 6635}}, {"ph": "f", "id": 6635, "pid": 494, "tid": 494, "ts": 1742522672415570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<int>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<int>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672415627, "dur": 1, "args": {"External id": 6642, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 6642, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 6642, "pid": 0, "tid": 7, "ts": 1742522672415627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672415591, "dur": 34, "args": {"External id": 6642, "cbid": 211, "correlation": 6642}}, {"ph": "s", "id": 6642, "pid": 494, "tid": 494, "ts": 1742522672415591, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672415636, "dur": 0, "args": {"External id": 6647, "cbid": 317, "correlation": 6647}}, {"ph": "f", "id": 6647, "pid": 494, "tid": 494, "ts": 1742522672415636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415643, "dur": 1, "args": {"External id": 6653, "cbid": 147, "correlation": 6653}}, {"ph": "s", "id": 6653, "pid": 494, "tid": 494, "ts": 1742522672415643, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamGetCaptureInfo_v2", "pid": 494, "tid": 494, "ts": 1742522672415651, "dur": 0, "args": {"External id": 6657, "cbid": 409, "correlation": 6657}}, {"ph": "f", "id": 6657, "pid": 494, "tid": 494, "ts": 1742522672415651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415657, "dur": 0, "args": {"External id": 6662, "cbid": 147, "correlation": 6662}}, {"ph": "s", "id": 6662, "pid": 494, "tid": 494, "ts": 1742522672415657, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaGetFuncBySymbol", "pid": 494, "tid": 494, "ts": 1742522672415659, "dur": 0, "args": {"External id": 6664, "cbid": 336, "correlation": 6664}}, {"ph": "f", "id": 6664, "pid": 494, "tid": 494, "ts": 1742522672415659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "ncclDevKernel_AllReduce_Sum_u32_TREE_LL(ncclDevKernelArgsStorage<4096ul>)", "pid": 0, "tid": 13, "ts": 1742522672415709, "dur": 106, "args": {"External id": 6665, "queued": 0, "device": 0, "context": 1, "stream": 13, "correlation": 6665, "registers per thread": 96, "shared memory": 103776, "blocks per SM": 0.007575758, "warps per SM": 0.15151516, "grid": [1, 1, 1], "block": [640, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 6665, "pid": 0, "tid": 13, "ts": 1742522672415709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415714, "dur": 0, "args": {"External id": 6668, "cbid": 147, "correlation": 6668}}, {"ph": "s", "id": 6668, "pid": 494, "tid": 494, "ts": 1742522672415714, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415730, "dur": 2, "args": {"External id": 6687, "cbid": 147, "correlation": 6687}}, {"ph": "s", "id": 6687, "pid": 494, "tid": 494, "ts": 1742522672415730, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672415744, "dur": 1, "args": {"External id": 6694, "cbid": 147, "correlation": 6694}}, {"ph": "s", "id": 6694, "pid": 494, "tid": 494, "ts": 1742522672415744, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pageable)", "pid": 0, "tid": 7, "ts": 1742522672415822, "dur": 2, "args": {"External id": 6703, "device": 0, "context": 1, "stream": 7, "correlation": 6703, "bytes": 4, "memory bandwidth (GB/s)": 0.001893939393939394}}, {"ph": "f", "id": 6703, "pid": 0, "tid": 7, "ts": 1742522672415822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672415754, "dur": 74, "args": {"External id": 6703, "cbid": 41, "correlation": 6703}}, {"ph": "s", "id": 6703, "pid": 494, "tid": 494, "ts": 1742522672415754, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672415829, "dur": 4, "args": {"External id": 6704, "cbid": 131, "correlation": 6704}}, {"ph": "s", "id": 6704, "pid": 494, "tid": 494, "ts": 1742522672415829, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672416163, "dur": 3, "args": {"External id": 6716, "cbid": 138, "correlation": 6716}}, {"ph": "f", "id": 6716, "pid": 494, "tid": 494, "ts": 1742522672416163, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416187, "dur": 0, "args": {"External id": 6723, "device": 0, "context": 1, "stream": 7, "correlation": 6723, "bytes": 2048, "memory bandwidth (GB/s)": 2.56}}, {"ph": "f", "id": 6723, "pid": 0, "tid": 7, "ts": 1742522672416187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416177, "dur": 8, "args": {"External id": 6723, "cbid": 41, "correlation": 6723}}, {"ph": "s", "id": 6723, "pid": 494, "tid": 494, "ts": 1742522672416177, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416186, "dur": 5, "args": {"External id": 6724, "cbid": 131, "correlation": 6724}}, {"ph": "s", "id": 6724, "pid": 494, "tid": 494, "ts": 1742522672416186, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416205, "dur": 0, "args": {"External id": 6737, "device": 0, "context": 1, "stream": 7, "correlation": 6737, "bytes": 2048, "memory bandwidth (GB/s)": 2.4615384615384617}}, {"ph": "f", "id": 6737, "pid": 0, "tid": 7, "ts": 1742522672416205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416199, "dur": 3, "args": {"External id": 6737, "cbid": 41, "correlation": 6737}}, {"ph": "s", "id": 6737, "pid": 494, "tid": 494, "ts": 1742522672416199, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416203, "dur": 5, "args": {"External id": 6738, "cbid": 131, "correlation": 6738}}, {"ph": "s", "id": 6738, "pid": 494, "tid": 494, "ts": 1742522672416203, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416221, "dur": 0, "args": {"External id": 6751, "device": 0, "context": 1, "stream": 7, "correlation": 6751, "bytes": 2048, "memory bandwidth (GB/s)": 2.1333333333333333}}, {"ph": "f", "id": 6751, "pid": 0, "tid": 7, "ts": 1742522672416221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416216, "dur": 3, "args": {"External id": 6751, "cbid": 41, "correlation": 6751}}, {"ph": "s", "id": 6751, "pid": 494, "tid": 494, "ts": 1742522672416216, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416219, "dur": 5, "args": {"External id": 6752, "cbid": 131, "correlation": 6752}}, {"ph": "s", "id": 6752, "pid": 494, "tid": 494, "ts": 1742522672416219, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416237, "dur": 0, "args": {"External id": 6765, "device": 0, "context": 1, "stream": 7, "correlation": 6765, "bytes": 512, "memory bandwidth (GB/s)": 0.6965986394557823}}, {"ph": "f", "id": 6765, "pid": 0, "tid": 7, "ts": 1742522672416237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416232, "dur": 2, "args": {"External id": 6765, "cbid": 41, "correlation": 6765}}, {"ph": "s", "id": 6765, "pid": 494, "tid": 494, "ts": 1742522672416232, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416235, "dur": 5, "args": {"External id": 6766, "cbid": 131, "correlation": 6766}}, {"ph": "s", "id": 6766, "pid": 494, "tid": 494, "ts": 1742522672416235, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416256, "dur": 2, "args": {"External id": 6779, "device": 0, "context": 1, "stream": 7, "correlation": 6779, "bytes": 33280, "memory bandwidth (GB/s)": 12.525404591644712}}, {"ph": "f", "id": 6779, "pid": 0, "tid": 7, "ts": 1742522672416256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416249, "dur": 4, "args": {"External id": 6779, "cbid": 41, "correlation": 6779}}, {"ph": "s", "id": 6779, "pid": 494, "tid": 494, "ts": 1742522672416249, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416254, "dur": 7, "args": {"External id": 6780, "cbid": 131, "correlation": 6780}}, {"ph": "s", "id": 6780, "pid": 494, "tid": 494, "ts": 1742522672416254, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416273, "dur": 0, "args": {"External id": 6793, "device": 0, "context": 1, "stream": 7, "correlation": 6793, "bytes": 256, "memory bandwidth (GB/s)": 0.3473541383989145}}, {"ph": "f", "id": 6793, "pid": 0, "tid": 7, "ts": 1742522672416273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416268, "dur": 3, "args": {"External id": 6793, "cbid": 41, "correlation": 6793}}, {"ph": "s", "id": 6793, "pid": 494, "tid": 494, "ts": 1742522672416268, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416271, "dur": 5, "args": {"External id": 6794, "cbid": 131, "correlation": 6794}}, {"ph": "s", "id": 6794, "pid": 494, "tid": 494, "ts": 1742522672416271, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416294, "dur": 0, "args": {"External id": 6815, "device": 0, "context": 1, "stream": 7, "correlation": 6815, "bytes": 512, "memory bandwidth (GB/s)": 0.64}}, {"ph": "f", "id": 6815, "pid": 0, "tid": 7, "ts": 1742522672416294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416289, "dur": 3, "args": {"External id": 6815, "cbid": 41, "correlation": 6815}}, {"ph": "s", "id": 6815, "pid": 494, "tid": 494, "ts": 1742522672416289, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416292, "dur": 5, "args": {"External id": 6816, "cbid": 131, "correlation": 6816}}, {"ph": "s", "id": 6816, "pid": 494, "tid": 494, "ts": 1742522672416292, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416432, "dur": 0, "args": {"External id": 6829, "device": 0, "context": 1, "stream": 7, "correlation": 6829, "bytes": 2048, "memory bandwidth (GB/s)": 2.373117033603708}}, {"ph": "f", "id": 6829, "pid": 0, "tid": 7, "ts": 1742522672416432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416425, "dur": 4, "args": {"External id": 6829, "cbid": 41, "correlation": 6829}}, {"ph": "s", "id": 6829, "pid": 494, "tid": 494, "ts": 1742522672416425, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416430, "dur": 5, "args": {"External id": 6830, "cbid": 131, "correlation": 6830}}, {"ph": "s", "id": 6830, "pid": 494, "tid": 494, "ts": 1742522672416430, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416448, "dur": 0, "args": {"External id": 6843, "device": 0, "context": 1, "stream": 7, "correlation": 6843, "bytes": 1024, "memory bandwidth (GB/s)": 1.28}}, {"ph": "f", "id": 6843, "pid": 0, "tid": 7, "ts": 1742522672416448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416443, "dur": 2, "args": {"External id": 6843, "cbid": 41, "correlation": 6843}}, {"ph": "s", "id": 6843, "pid": 494, "tid": 494, "ts": 1742522672416443, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416446, "dur": 5, "args": {"External id": 6844, "cbid": 131, "correlation": 6844}}, {"ph": "s", "id": 6844, "pid": 494, "tid": 494, "ts": 1742522672416446, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416464, "dur": 0, "args": {"External id": 6857, "device": 0, "context": 1, "stream": 7, "correlation": 6857, "bytes": 1024, "memory bandwidth (GB/s)": 1.3315994798439532}}, {"ph": "f", "id": 6857, "pid": 0, "tid": 7, "ts": 1742522672416464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416458, "dur": 3, "args": {"External id": 6857, "cbid": 41, "correlation": 6857}}, {"ph": "s", "id": 6857, "pid": 494, "tid": 494, "ts": 1742522672416458, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416462, "dur": 5, "args": {"External id": 6858, "cbid": 131, "correlation": 6858}}, {"ph": "s", "id": 6858, "pid": 494, "tid": 494, "ts": 1742522672416462, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416485, "dur": 0, "args": {"External id": 6879, "device": 0, "context": 1, "stream": 7, "correlation": 6879, "bytes": 512, "memory bandwidth (GB/s)": 0.6657997399219766}}, {"ph": "f", "id": 6879, "pid": 0, "tid": 7, "ts": 1742522672416485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416480, "dur": 2, "args": {"External id": 6879, "cbid": 41, "correlation": 6879}}, {"ph": "s", "id": 6879, "pid": 494, "tid": 494, "ts": 1742522672416480, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416483, "dur": 5, "args": {"External id": 6880, "cbid": 131, "correlation": 6880}}, {"ph": "s", "id": 6880, "pid": 494, "tid": 494, "ts": 1742522672416483, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416506, "dur": 0, "args": {"External id": 6901, "device": 0, "context": 1, "stream": 7, "correlation": 6901, "bytes": 1024, "memory bandwidth (GB/s)": 1.3333333333333333}}, {"ph": "f", "id": 6901, "pid": 0, "tid": 7, "ts": 1742522672416506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416501, "dur": 3, "args": {"External id": 6901, "cbid": 41, "correlation": 6901}}, {"ph": "s", "id": 6901, "pid": 494, "tid": 494, "ts": 1742522672416501, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416505, "dur": 5, "args": {"External id": 6902, "cbid": 131, "correlation": 6902}}, {"ph": "s", "id": 6902, "pid": 494, "tid": 494, "ts": 1742522672416505, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416706, "dur": 1, "args": {"External id": 6913, "device": 0, "context": 1, "stream": 7, "correlation": 6913, "bytes": 2048, "memory bandwidth (GB/s)": 1.68559670781893}}, {"ph": "f", "id": 6913, "pid": 0, "tid": 7, "ts": 1742522672416706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416690, "dur": 14, "args": {"External id": 6913, "cbid": 41, "correlation": 6913}}, {"ph": "s", "id": 6913, "pid": 494, "tid": 494, "ts": 1742522672416690, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416733, "dur": 1, "args": {"External id": 6930, "device": 0, "context": 1, "stream": 7, "correlation": 6930, "bytes": 2048, "memory bandwidth (GB/s)": 1.7762359063313096}}, {"ph": "f", "id": 6930, "pid": 0, "tid": 7, "ts": 1742522672416733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416725, "dur": 5, "args": {"External id": 6930, "cbid": 41, "correlation": 6930}}, {"ph": "s", "id": 6930, "pid": 494, "tid": 494, "ts": 1742522672416725, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416751, "dur": 1, "args": {"External id": 6947, "device": 0, "context": 1, "stream": 7, "correlation": 6947, "bytes": 2048, "memory bandwidth (GB/s)": 1.391304347826087}}, {"ph": "f", "id": 6947, "pid": 0, "tid": 7, "ts": 1742522672416751, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416743, "dur": 4, "args": {"External id": 6947, "cbid": 41, "correlation": 6947}}, {"ph": "s", "id": 6947, "pid": 494, "tid": 494, "ts": 1742522672416743, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416769, "dur": 1, "args": {"External id": 6964, "device": 0, "context": 1, "stream": 7, "correlation": 6964, "bytes": 256, "memory bandwidth (GB/s)": 0.1906180193596426}}, {"ph": "f", "id": 6964, "pid": 0, "tid": 7, "ts": 1742522672416769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416762, "dur": 5, "args": {"External id": 6964, "cbid": 41, "correlation": 6964}}, {"ph": "s", "id": 6964, "pid": 494, "tid": 494, "ts": 1742522672416762, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416790, "dur": 1, "args": {"External id": 6984, "device": 0, "context": 1, "stream": 7, "correlation": 6984, "bytes": 512, "memory bandwidth (GB/s)": 0.43243243243243246}}, {"ph": "f", "id": 6984, "pid": 0, "tid": 7, "ts": 1742522672416790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416783, "dur": 4, "args": {"External id": 6984, "cbid": 41, "correlation": 6984}}, {"ph": "s", "id": 6984, "pid": 494, "tid": 494, "ts": 1742522672416783, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<int>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<int>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672416841, "dur": 1, "args": {"External id": 7010, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7010, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 1.939394, "grid": [64, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 3}}, {"ph": "f", "id": 7010, "pid": 0, "tid": 7, "ts": 1742522672416841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672416812, "dur": 27, "args": {"External id": 7010, "cbid": 211, "correlation": 7010}}, {"ph": "s", "id": 7010, "pid": 494, "tid": 494, "ts": 1742522672416812, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672416869, "dur": 2, "args": {"External id": 7023, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7023, "registers per thread": 18, "shared memory": 0, "blocks per SM": 0.25, "warps per SM": 1, "grid": [33, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7023, "pid": 0, "tid": 7, "ts": 1742522672416869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672416862, "dur": 6, "args": {"External id": 7023, "cbid": 211, "correlation": 7023}}, {"ph": "s", "id": 7023, "pid": 494, "tid": 494, "ts": 1742522672416862, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pageable)", "pid": 0, "tid": 7, "ts": 1742522672416915, "dur": 2, "args": {"External id": 7041, "device": 0, "context": 1, "stream": 7, "correlation": 7041, "bytes": 512, "memory bandwidth (GB/s)": 0.2318840579710145}}, {"ph": "f", "id": 7041, "pid": 0, "tid": 7, "ts": 1742522672416915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416906, "dur": 14, "args": {"External id": 7041, "cbid": 41, "correlation": 7041}}, {"ph": "s", "id": 7041, "pid": 494, "tid": 494, "ts": 1742522672416906, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416922, "dur": 4, "args": {"External id": 7042, "cbid": 131, "correlation": 7042}}, {"ph": "s", "id": 7042, "pid": 494, "tid": 494, "ts": 1742522672416922, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672416962, "dur": 0, "args": {"External id": 7048, "device": 0, "context": 1, "stream": 7, "correlation": 7048, "bytes": 1024, "memory bandwidth (GB/s)": 1.2322503008423586}}, {"ph": "f", "id": 7048, "pid": 0, "tid": 7, "ts": 1742522672416962, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672416956, "dur": 4, "args": {"External id": 7048, "cbid": 41, "correlation": 7048}}, {"ph": "s", "id": 7048, "pid": 494, "tid": 494, "ts": 1742522672416956, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672416961, "dur": 5, "args": {"External id": 7049, "cbid": 131, "correlation": 7049}}, {"ph": "s", "id": 7049, "pid": 494, "tid": 494, "ts": 1742522672416961, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672416984, "dur": 0, "args": {"External id": 7059, "cbid": 317, "correlation": 7059}}, {"ph": "f", "id": 7059, "pid": 494, "tid": 494, "ts": 1742522672416984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672416987, "dur": 0, "args": {"External id": 7063, "cbid": 317, "correlation": 7063}}, {"ph": "f", "id": 7063, "pid": 494, "tid": 494, "ts": 1742522672416987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672417018, "dur": 1, "args": {"External id": 7066, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7066, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7066, "pid": 0, "tid": 7, "ts": 1742522672417018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672416992, "dur": 24, "args": {"External id": 7066, "cbid": 211, "correlation": 7066}}, {"ph": "s", "id": 7066, "pid": 494, "tid": 494, "ts": 1742522672416992, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672417026, "dur": 1, "args": {"External id": 7072, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7072, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7072, "pid": 0, "tid": 7, "ts": 1742522672417026, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672417020, "dur": 3, "args": {"External id": 7072, "cbid": 211, "correlation": 7072}}, {"ph": "s", "id": 7072, "pid": 494, "tid": 494, "ts": 1742522672417020, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<int>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<int>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672417085, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::indexSelectLargeIndex<c10::BFloat16, long, unsigned int, 2, 2, -2, true>(at::cuda::detail::TensorInfo<c10::BFloat16, unsigned int>, at::cuda::detail::TensorInfo<c10::BFloat16, unsigned int>, at::cuda::detail::TensorInfo<long, unsigned int>, int, int, unsigned int, unsigned int, long)", "pid": 0, "tid": 7, "ts": 1742522672417087, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 8, "warps per SM": 32, "grid": [1056, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 50}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672417099, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 86, "shared memory": 256, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672417104, "dur": 14, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672417118, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [512, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672417121, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 5.818182, "warps per SM": 23.272728, "grid": [768, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 36}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 128u, 128u, 5u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672417123, "dur": 26, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199296, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672417151, "dur": 29, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [8, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672417181, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "get_mla_metadata_kernel(Mla_metadata_params)", "pid": 0, "tid": 7, "ts": 1742522672417192, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 32768, "blocks per SM": 0.007575758, "warps per SM": 0.007575758, "grid": [1, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672417204, "dur": 466, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672417671, "dur": 24, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 248.24243, "warps per SM": 992.9697, "grid": [32768, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672417696, "dur": 26, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [2, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672417723, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 62.060608, "warps per SM": 248.24243, "grid": [8192, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672417732, "dur": 54, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 217184, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672417787, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672417792, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<36864u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672417798, "dur": 131, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417798, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672417930, "dur": 33, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 18432u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672417964, "dur": 60, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 217312, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672417964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672418025, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672418037, "dur": 14, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418037, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672418052, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [512, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418052, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672418055, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 5.818182, "warps per SM": 23.272728, "grid": [768, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 36}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 128u, 128u, 5u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672418057, "dur": 25, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199296, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672418084, "dur": 30, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [8, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672418114, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418114, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672418127, "dur": 466, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672418594, "dur": 25, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 248.24243, "warps per SM": 992.9697, "grid": [32768, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672418624, "dur": 26, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [2, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672418650, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 62.060608, "warps per SM": 248.24243, "grid": [8192, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672418660, "dur": 55, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 217184, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672418660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672419858, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672419858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672419863, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672419863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<36864u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672419868, "dur": 136, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672419868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672420005, "dur": 33, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420005, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 18432u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672420038, "dur": 66, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 217312, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672420106, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672420112, "dur": 14, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672420127, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [512, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672420130, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 5.818182, "warps per SM": 23.272728, "grid": [768, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 36}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 128u, 128u, 5u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672420133, "dur": 26, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199296, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672420160, "dur": 30, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [8, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672420191, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672420201, "dur": 465, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672420667, "dur": 25, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 248.24243, "warps per SM": 992.9697, "grid": [32768, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672420692, "dur": 26, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [2, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672420719, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 62.060608, "warps per SM": 248.24243, "grid": [8192, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672420728, "dur": 55, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 217184, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672420784, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672420789, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<36864u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672420795, "dur": 132, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672420928, "dur": 33, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 18432u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672420961, "dur": 60, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 217312, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672420961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672421022, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 75}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672421034, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 75}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<8> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<8> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<8> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<8> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672421047, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<8> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<8> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<8> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<8> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672421050, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<4> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<4> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<4> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<4> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672421053, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<4> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<4> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<4> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<4> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672421056, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 1.939394, "grid": [64, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 3}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421056, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<1> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<1> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<1> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<1> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672421061, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672421064, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421064, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672421068, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672421081, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672421083, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672421085, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672421106, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672421124, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421124, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "get_mla_metadata_kernel(Mla_metadata_params)", "pid": 0, "tid": 7, "ts": 1742522672421131, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 32768, "blocks per SM": 0.007575758, "warps per SM": 0.007575758, "grid": [1, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672421141, "dur": 245, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672421387, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672421399, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672421414, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672421420, "dur": 48, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672421469, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672421473, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672421480, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421480, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672421483, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672421485, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672421493, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672421512, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672421515, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672421532, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672421536, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672421546, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672421551, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672421564, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672421567, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672421569, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672421588, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672421605, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672421614, "dur": 1603, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672421614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672423217, "dur": 83, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672423300, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672423309, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672423349, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "get_mla_metadata_kernel(Mla_metadata_params)", "pid": 0, "tid": 7, "ts": 1742522672423369, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 32768, "blocks per SM": 0.007575758, "warps per SM": 0.007575758, "grid": [1, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672423379, "dur": 245, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423379, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672423625, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672423636, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672423652, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423652, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672423657, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672423707, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423707, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672423712, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672423718, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672423721, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672423724, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672423732, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672423750, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672423753, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672423771, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672423774, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672423792, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672423796, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672423807, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672423811, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672423825, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672423827, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672423830, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672423849, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672423866, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672423875, "dur": 67, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672423942, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672423942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672424023, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672424031, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672424071, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672424092, "dur": 245, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672424338, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672424349, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672424366, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672424371, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672424422, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672424426, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672424433, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672424436, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672424438, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424438, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672424446, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672424464, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672424468, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672424485, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672424489, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672424507, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672424511, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672424522, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672424527, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672424541, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672424543, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672424545, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672424565, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672424582, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672424591, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672424659, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672424739, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424739, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672424748, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672424788, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672424808, "dur": 245, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672424808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672425054, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672425066, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425082, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425088, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425088, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672425138, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672425142, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425142, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672425149, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672425152, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672425155, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672425163, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425163, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672425180, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672425183, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425201, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425204, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425224, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425228, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672425238, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425243, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672425257, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425259, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425261, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672425281, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672425298, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672425306, "dur": 58, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425365, "dur": 82, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425448, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425456, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672425497, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425497, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672425517, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672425761, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672425772, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425788, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425794, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672425846, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425846, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672425850, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425850, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672425856, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425856, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672425860, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672425862, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425862, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672425870, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672425888, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672425891, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425910, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425913, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425931, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425935, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672425945, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425950, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672425964, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672425966, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672425968, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672425988, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672425988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672426005, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426005, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672426013, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426079, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672426161, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426169, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672426209, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672426232, "dur": 244, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672426477, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426477, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672426488, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672426504, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426504, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426510, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672426561, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672426565, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672426572, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672426575, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672426578, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672426585, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672426603, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672426607, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672426625, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426628, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672426646, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426650, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672426661, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426665, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426665, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672426679, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672426682, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426684, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672426703, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426703, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672426721, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672426729, "dur": 59, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426789, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672426869, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672426877, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426877, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672426918, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672426938, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672426938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672427182, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672427194, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672427210, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672427215, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427215, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672427266, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672427271, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672427277, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672427280, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672427283, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672427291, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672427309, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672427312, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672427332, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672427335, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672427353, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672427357, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427357, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672427367, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672427372, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672427386, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427386, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672427388, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672427390, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672427410, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672427427, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672427436, "dur": 63, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672427499, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672427580, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427580, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672427588, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672427629, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672427649, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427649, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672427892, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672427904, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672427920, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672427925, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672427977, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672427981, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672427988, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672427991, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672427994, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672427994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672428002, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672428020, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672428023, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428041, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428044, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428062, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428066, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672428077, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428082, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672428095, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428098, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428100, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672428119, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672428136, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672428144, "dur": 66, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428211, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428293, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428301, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672428341, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672428362, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672428606, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672428618, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428618, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428634, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428639, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672428691, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672428695, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672428702, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672428705, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428705, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672428708, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672428715, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672428733, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672428736, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428756, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428759, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428778, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428782, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672428792, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428797, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672428811, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428813, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428815, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672428835, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672428853, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672428861, "dur": 63, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428861, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672428925, "dur": 74, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672428999, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672428999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672429007, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672429040, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429040, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672429060, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672429304, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672429316, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672429332, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672429338, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672429389, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672429393, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672429399, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672429402, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672429405, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429405, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672429412, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672429431, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672429434, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672429452, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672429456, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672429474, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672429478, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672429489, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672429493, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672429507, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672429510, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672429512, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672429532, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672429550, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672429558, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672429640, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672429721, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672429730, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672429770, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672429792, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672429792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672430035, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672430046, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430062, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430067, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672430119, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672430123, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672430130, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672430133, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672430136, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672430143, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672430161, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672430164, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430164, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430182, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430185, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430204, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430208, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672430218, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430218, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430224, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672430237, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430240, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430242, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672430262, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672430279, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430279, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672430287, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430351, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430430, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430438, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430438, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672430478, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672430498, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672430743, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672430754, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430770, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430776, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672430827, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672430831, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672430838, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672430841, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672430844, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672430851, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430851, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672430869, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672430872, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430890, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430893, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430911, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430915, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672430926, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430931, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672430944, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672430947, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672430949, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672430969, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672430986, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672430994, "dur": 61, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672430994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431056, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431056, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672431137, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431145, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672431185, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672431207, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672431449, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431449, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672431461, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672431477, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431477, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431482, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672431534, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672431538, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672431545, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672431548, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672431550, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672431557, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672431575, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672431578, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672431597, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431600, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672431619, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431623, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431623, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672431633, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431638, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672431651, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672431654, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431656, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672431676, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431676, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672431693, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672431701, "dur": 66, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431767, "dur": 82, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672431850, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431850, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672431858, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672431898, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672431918, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672431918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672432162, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672432173, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672432189, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672432194, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672432246, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672432250, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672432257, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672432260, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672432263, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672432270, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672432288, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672432291, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672432310, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672432313, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672432332, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672432336, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672432347, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672432351, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672432365, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672432367, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672432369, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672432389, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672432407, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672432415, "dur": 63, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672432478, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672432554, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672432562, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672432595, "dur": 22, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672432618, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432618, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672432861, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432861, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672432873, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672432889, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672432895, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672432946, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432946, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672432950, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672432957, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672432960, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432960, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672432963, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672432970, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672432988, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672432991, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672432991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433009, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433013, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433031, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433036, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672433046, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433051, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672433065, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433067, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433069, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672433089, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433089, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672433106, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672433114, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433114, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433189, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433267, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433274, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672433307, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672433327, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672433569, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672433581, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433597, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433602, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433602, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672433653, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672433657, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672433664, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672433667, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672433670, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672433678, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672433695, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672433699, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433716, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433719, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433738, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433742, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433742, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672433753, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433758, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433758, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672433771, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433774, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433776, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672433795, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672433812, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672433820, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433900, "dur": 74, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672433974, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672433982, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672433982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672434015, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672434035, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672434278, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434278, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672434290, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672434306, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672434312, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672434363, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434363, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672434367, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672434374, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672434377, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672434380, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434380, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672434387, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672434405, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434405, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672434408, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672434427, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672434430, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672434448, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672434452, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672434463, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672434468, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672434481, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672434484, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434484, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672434486, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672434505, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672434522, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672434531, "dur": 82, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672434614, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672434694, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672434703, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434703, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672434743, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672434764, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672434764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672435008, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672435020, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435037, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435037, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435042, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672435092, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672435097, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672435103, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672435106, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672435109, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672435117, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435117, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672435135, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672435138, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435156, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435159, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435177, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435181, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672435192, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435197, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672435210, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435212, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435214, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672435234, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672435252, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672435260, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435325, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435406, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435415, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672435455, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672435475, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672435716, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672435728, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435744, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435750, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672435801, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672435805, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672435812, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672435815, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672435818, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672435826, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672435844, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672435847, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435866, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435869, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435887, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435891, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672435902, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435907, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672435920, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672435923, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672435925, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672435946, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435946, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672435962, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435962, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672435971, "dur": 66, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672435971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436037, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436037, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672436116, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436125, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672436166, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672436187, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672436430, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672436442, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672436458, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436463, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672436514, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436514, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672436519, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672436525, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672436528, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672436531, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672436539, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672436556, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436556, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672436560, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672436578, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436581, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672436599, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436599, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436604, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672436614, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436619, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672436633, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672436636, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436638, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672436657, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672436674, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436674, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672436682, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436748, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672436824, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672436832, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672436865, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672436885, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672436885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672437127, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672437139, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672437154, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437160, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672437210, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672437214, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672437221, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672437224, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672437227, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672437234, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672437252, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672437255, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672437274, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437277, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672437295, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437299, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437299, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672437309, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437314, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672437328, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672437331, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437333, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672437352, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672437369, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672437378, "dur": 86, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437465, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672437542, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437542, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437550, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672437583, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672437604, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672437847, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672437859, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672437875, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437880, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437880, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672437931, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672437935, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672437942, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672437945, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672437947, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672437955, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672437973, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672437976, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672437995, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672437998, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672437998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672438016, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438016, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438021, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672438031, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438036, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672438050, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672438052, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438052, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438054, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672438074, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672438091, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672438099, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438177, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672438258, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438258, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438267, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672438307, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672438328, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672438571, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438571, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672438583, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672438599, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438599, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438604, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672438656, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672438660, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672438666, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672438669, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438669, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672438672, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672438680, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672438698, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672438701, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672438719, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438722, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672438741, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438745, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672438756, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438761, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672438775, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672438777, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438779, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672438799, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672438816, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438816, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672438824, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438889, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672438969, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672438978, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672438978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672439017, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672439038, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672439282, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672439293, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672439310, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672439315, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672439367, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672439371, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672439378, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672439381, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672439383, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672439391, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672439409, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672439412, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672439432, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672439435, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439435, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672439453, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672439457, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672439467, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672439472, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672439486, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672439488, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672439490, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672439510, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672439526, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672439535, "dur": 62, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672439597, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672439679, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672439687, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672439727, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672439749, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672439992, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672439992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672440004, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440020, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440025, "dur": 52, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672440077, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672440082, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672440088, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440088, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672440091, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672440094, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672440102, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440102, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672440119, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672440123, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440141, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440144, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440162, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440166, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672440177, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440181, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672440195, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440198, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440200, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672440219, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440219, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672440236, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672440244, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440244, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440309, "dur": 73, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440383, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440391, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672440424, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672440445, "dur": 240, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672440686, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440686, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672440698, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440714, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440719, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672440771, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672440775, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672440782, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672440785, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672440788, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672440795, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672440813, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672440817, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440835, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440838, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440856, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440856, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440860, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672440870, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440875, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672440889, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672440891, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672440893, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672440912, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672440929, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672440939, "dur": 83, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672440939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441022, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672441102, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441102, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441111, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672441152, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672441172, "dur": 240, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672441414, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672441425, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441425, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672441442, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441447, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672441498, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672441502, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672441509, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672441512, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672441515, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672441523, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672441541, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672441544, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672441562, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441565, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672441584, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441588, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672441599, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441599, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441603, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672441617, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672441619, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441621, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672441641, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672441658, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672441667, "dur": 71, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441738, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672441820, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672441828, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672441869, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672441890, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672441890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672442135, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672442146, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672442163, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442163, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442168, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672442219, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442219, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672442223, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442223, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672442230, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672442233, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672442236, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672442243, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672442261, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672442264, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672442282, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442285, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672442304, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442308, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672442318, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442323, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672442337, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672442340, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442342, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442342, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672442361, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672442378, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672442387, "dur": 62, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442450, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672442529, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442537, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672442577, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672442597, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672442841, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672442853, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672442869, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442874, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672442925, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672442929, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672442936, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672442939, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672442942, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672442950, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672442967, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672442970, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672442989, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672442992, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672442992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672443010, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443014, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672443025, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443030, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672443044, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672443046, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443048, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672443068, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672443085, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672443093, "dur": 66, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443160, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672443239, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443248, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672443288, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672443308, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672443551, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672443562, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672443578, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443583, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672443634, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672443638, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672443645, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672443648, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672443651, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672443659, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672443677, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672443680, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672443698, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443701, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672443719, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443723, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672443733, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443738, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672443752, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672443755, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443757, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672443777, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672443794, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672443802, "dur": 71, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443874, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672443955, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672443963, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672443963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672444003, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444003, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672444023, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672444265, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672444277, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672444292, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672444298, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672444349, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672444353, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672444360, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672444363, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444363, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672444365, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672444373, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672444390, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672444394, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672444412, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672444415, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672444433, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672444437, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672444448, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672444453, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672444466, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444466, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672444469, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672444471, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672444491, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672444508, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672444516, "dur": 74, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444516, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672444591, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672444672, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672444680, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672444720, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672444740, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672444983, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444983, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672444994, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672444994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445010, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445015, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672445067, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672445071, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672445078, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672445081, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672445084, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672445091, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672445109, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672445112, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445130, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445133, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445152, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445156, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672445167, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445171, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672445185, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445187, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445190, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672445209, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672445226, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445226, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672445234, "dur": 66, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445300, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445381, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445390, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672445430, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672445450, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672445694, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672445706, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445722, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445727, "dur": 52, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672445780, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672445785, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672445791, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672445794, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672445797, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672445805, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672445822, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672445826, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445843, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445847, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445864, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445869, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672445879, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445884, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672445898, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672445900, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672445902, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672445922, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672445939, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672445948, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672445948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446017, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672446093, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446100, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672446133, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672446153, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672446395, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672446407, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672446423, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446423, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446428, "dur": 53, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672446482, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672446486, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672446492, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672446495, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672446498, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672446506, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672446524, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446524, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672446528, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672446546, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446549, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672446567, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446571, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446571, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672446582, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446587, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672446600, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672446603, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446605, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672446625, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672446642, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672446651, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446729, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672446808, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672446815, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672446848, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672446868, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672446868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672447111, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672447123, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672447139, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447144, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672447195, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672447200, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672447206, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447206, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672447209, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672447212, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672447220, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672447238, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672447241, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672447260, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447264, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672447282, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447286, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447286, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672447296, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447296, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447301, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672447315, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672447318, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447320, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672447339, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672447356, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672447365, "dur": 72, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447437, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672447519, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447528, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672447568, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672447589, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672447831, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672447843, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672447860, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447865, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672447917, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672447921, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447921, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672447927, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672447930, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672447933, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672447941, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672447958, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672447961, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672447979, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672447982, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672447982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672448001, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448001, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448005, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448005, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672448015, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448020, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672448033, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672448036, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448038, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672448058, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672448075, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672448084, "dur": 67, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448151, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672448231, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448231, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448240, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672448280, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672448301, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672448544, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672448556, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448556, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672448572, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448577, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672448628, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672448632, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672448639, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672448642, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672448645, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672448653, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672448670, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672448673, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672448692, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448695, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672448714, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448718, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672448728, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448733, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672448746, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448746, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672448749, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448751, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448751, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672448770, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672448788, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672448796, "dur": 70, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448867, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672448948, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672448957, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672448997, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672448997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672449017, "dur": 244, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672449263, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672449274, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672449290, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672449296, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449296, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672449347, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672449352, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672449358, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672449361, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672449364, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449364, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672449371, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672449389, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672449392, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672449410, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672449413, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672449431, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672449435, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449435, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672449446, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672449451, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672449465, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672449467, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672449469, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672449489, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672449506, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672449514, "dur": 60, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449514, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672449575, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672449650, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672449658, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672449691, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672449710, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672449953, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672449964, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672449980, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672449986, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672449986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672450036, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672450040, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450040, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672450047, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672450050, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672450053, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672450061, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672450078, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672450082, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672450100, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450103, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672450121, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450125, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672450136, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450141, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672450155, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672450157, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450157, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450159, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672450178, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450178, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672450196, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672450204, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450281, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672450358, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450367, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672450407, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672450428, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672450671, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672450683, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450683, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672450699, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450704, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672450755, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672450759, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672450765, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672450768, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672450771, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672450779, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672450796, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672450799, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672450818, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450821, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672450840, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450844, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672450855, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450860, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672450873, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672450876, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450876, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450878, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672450897, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672450914, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672450923, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672450998, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672450998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672451079, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672451088, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451088, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672451129, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451129, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672451149, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672451391, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672451403, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451403, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672451419, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672451424, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672451476, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672451480, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451480, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672451486, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672451489, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672451492, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672451500, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672451517, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672451521, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672451539, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672451542, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451542, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672451562, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672451566, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672451576, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672451581, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672451595, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672451597, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672451600, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672451619, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672451636, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672451644, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672451712, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672451790, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672451799, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672451838, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672451860, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672451860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672452103, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672452115, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672452131, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452136, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672452187, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672452191, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672452198, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452198, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672452201, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672452204, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672452211, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672452229, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452229, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672452232, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672452250, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452253, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672452271, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452276, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672452286, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452286, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452291, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672452304, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672452307, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452309, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672452329, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672452345, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672452355, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452420, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672452498, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452505, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672452538, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672452557, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672452800, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672452812, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672452828, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452833, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672452886, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672452890, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672452896, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672452899, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672452902, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672452909, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452909, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672452927, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672452930, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672452949, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452952, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672452970, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452974, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672452984, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672452989, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672452989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672453003, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453003, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672453005, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453005, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453007, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672453026, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453026, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672453044, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672453052, "dur": 83, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453052, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453135, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672453214, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453222, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672453261, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672453283, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672453528, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672453539, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672453555, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453561, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672453612, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672453616, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672453622, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672453626, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672453628, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672453636, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672453654, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672453657, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672453675, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453678, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672453697, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453697, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453701, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672453712, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453717, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672453730, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672453733, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453735, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672453755, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672453773, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672453781, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453846, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453846, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672453924, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672453931, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672453964, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672453984, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672453984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672454228, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672454239, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672454255, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672454260, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672454310, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672454315, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672454321, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672454324, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672454327, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672454335, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672454352, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672454356, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672454374, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672454377, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672454395, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672454400, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672454410, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672454415, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672454428, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672454431, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672454433, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672454453, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672454470, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672454478, "dur": 82, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672454561, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672454638, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672454645, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672454678, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672454699, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672454941, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672454953, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672454969, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672454974, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672454974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672455024, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672455029, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672455036, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672455039, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672455041, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672455049, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672455067, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672455071, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672455089, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455089, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455093, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672455111, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455115, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672455126, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455130, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672455144, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672455146, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455148, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455148, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672455169, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672455186, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672455194, "dur": 71, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455266, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672455347, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455356, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672455396, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455396, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672455418, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672455662, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672455674, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455674, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672455689, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455695, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672455745, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672455749, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672455756, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672455759, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672455762, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672455769, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672455787, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672455790, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672455808, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455811, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672455829, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455833, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672455844, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455848, "dur": 14, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672455863, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672455865, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455868, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672455887, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672455904, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672455913, "dur": 66, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672455979, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672455979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672456058, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672456067, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672456107, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672456129, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456129, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672456373, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672456384, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456384, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672456400, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672456406, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672456457, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672456461, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672456468, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672456471, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672456474, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672456482, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672456499, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672456502, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672456520, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672456523, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672456542, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456542, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672456546, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672456556, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456556, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672456562, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672456576, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672456578, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672456580, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456580, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672456600, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672456617, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672456626, "dur": 63, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672456689, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672456767, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672456775, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672456815, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672456837, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672456837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672457080, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672457092, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672457108, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457113, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672457165, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457165, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672457169, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672457175, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457175, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672457178, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457178, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672457181, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672457189, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672457207, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672457210, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672457229, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457229, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457232, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672457250, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457254, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457254, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672457266, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457270, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672457284, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672457287, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457289, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672457308, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672457325, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672457333, "dur": 67, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457401, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457401, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672457483, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457491, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672457532, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672457553, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672457797, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672457808, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672457824, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457830, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672457881, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457881, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672457885, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672457891, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672457894, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672457897, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672457905, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672457922, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672457926, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672457944, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457947, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672457966, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457970, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672457980, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672457985, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672457999, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672457999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672458002, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458004, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672458023, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672458040, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458040, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672458049, "dur": 62, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458111, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672458189, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458197, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672458230, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672458249, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458249, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672458493, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672458505, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672458521, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458527, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672458578, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672458582, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672458588, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672458591, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672458594, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672458601, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672458619, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672458622, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672458640, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458644, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672458662, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458666, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672458676, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458676, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458681, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458681, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672458694, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672458697, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458697, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458699, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672458719, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672458735, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672458744, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458821, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672458902, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672458911, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672458951, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672458972, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672458972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672459214, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672459225, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672459242, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672459247, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459247, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672459298, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672459302, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459302, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672459308, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672459311, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672459314, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672459322, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672459339, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672459343, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672459361, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672459365, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672459383, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672459387, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672459397, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672459402, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672459416, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459416, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672459418, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672459420, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672459440, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672459457, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672459466, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459466, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672459530, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459530, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672459609, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672459618, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459618, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672459658, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672459679, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672459923, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672459935, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672459951, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672459956, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672459956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672460008, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672460012, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672460018, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672460021, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672460024, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672460032, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460032, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672460050, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672460053, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672460071, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460074, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672460093, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460097, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672460107, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460112, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672460126, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672460129, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460129, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460131, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672460150, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672460167, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672460175, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460175, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460244, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460244, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672460324, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460333, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672460374, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672460396, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460396, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672460640, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672460651, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672460667, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460673, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672460723, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672460728, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672460734, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672460737, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672460740, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672460748, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672460766, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672460769, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672460787, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460790, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672460809, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460813, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672460824, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460829, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672460842, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672460845, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460847, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672460867, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672460884, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672460892, "dur": 61, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672460954, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672460954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672461031, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672461039, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672461072, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672461092, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672461335, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672461346, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461346, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672461362, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672461368, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672461418, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672461422, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672461429, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461429, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672461432, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672461435, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461435, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672461442, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672461460, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461460, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672461463, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672461481, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672461485, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672461503, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672461507, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672461518, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672461522, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672461536, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672461539, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672461541, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672461560, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672461577, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672461585, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672461661, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672461741, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672461749, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672461789, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672461810, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672461810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672462053, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672462065, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462082, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462087, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672462138, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672462142, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462142, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672462148, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462148, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672462152, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672462154, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672462162, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672462180, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672462183, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462202, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462205, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462224, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462228, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672462238, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462243, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672462257, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462259, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462261, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672462282, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672462299, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462299, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672462307, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462371, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462450, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462459, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672462500, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672462521, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672462764, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672462776, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462792, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462797, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672462848, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672462852, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672462859, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672462862, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462862, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672462864, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672462872, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672462890, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672462893, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462911, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462914, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462932, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462936, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672462947, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462952, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672462965, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672462967, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672462969, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672462989, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672462989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672463006, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672463014, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463078, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672463159, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463167, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672463207, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672463228, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672463470, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672463482, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672463498, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463503, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672463553, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672463558, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672463564, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672463567, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672463570, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672463578, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672463595, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672463599, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463599, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672463617, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463620, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672463638, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463642, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672463653, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463657, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672463671, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672463673, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463675, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672463695, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672463712, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672463720, "dur": 74, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463794, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672463871, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672463878, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672463911, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672463933, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672463933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672464175, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464175, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672464187, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672464203, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464203, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672464208, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672464259, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672464263, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672464269, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672464272, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672464275, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672464283, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672464300, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672464304, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672464323, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672464326, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672464345, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672464349, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672464359, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672464364, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464364, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672464378, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672464381, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672464383, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672464402, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672464419, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672464427, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672464504, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464504, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672464584, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672464592, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464592, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672464633, "dur": 22, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672464656, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672464900, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672464911, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672464927, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672464933, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672464984, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672464988, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672464994, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672464997, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672464997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672465000, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465000, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672465008, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672465025, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672465029, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672465047, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465050, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672465068, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465072, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672465083, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465087, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672465101, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672465103, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465105, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672465125, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672465142, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465142, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672465150, "dur": 62, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465212, "dur": 74, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672465288, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465296, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465296, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672465329, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672465350, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672465593, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672465604, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672465621, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465626, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672465677, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672465681, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465681, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672465688, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672465691, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672465694, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672465701, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672465719, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672465722, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672465741, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465744, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672465762, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465766, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672465778, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465783, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672465796, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672465799, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465801, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672465820, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672465837, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672465845, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672465924, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672465924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672466002, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672466010, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672466050, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672466070, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672466313, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672466325, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672466341, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672466346, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466346, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672466398, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466398, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672466402, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672466409, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672466412, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672466414, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672466422, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672466440, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672466443, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466443, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672466461, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672466464, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672466482, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672466487, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672466497, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466497, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672466502, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672466515, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672466518, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672466520, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672466540, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466540, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672466556, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466556, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672466565, "dur": 71, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672466636, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672466717, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672466725, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672466765, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672466786, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672466786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672467028, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672467039, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467055, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467061, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672467112, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672467116, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672467123, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672467126, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672467128, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672467136, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672467154, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672467157, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467157, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467176, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467180, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467197, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467201, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672467212, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467217, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672467230, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467233, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467235, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467235, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672467254, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467254, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672467271, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672467280, "dur": 69, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467349, "dur": 73, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467423, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467423, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467430, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672467463, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672467483, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672467726, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467726, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672467737, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467753, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467759, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672467810, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672467814, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467814, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672467821, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672467824, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672467826, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672467834, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672467851, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467851, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672467855, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467872, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467876, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467876, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467894, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467898, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672467908, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467913, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672467927, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672467929, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672467931, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672467951, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672467968, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672467976, "dur": 84, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672467976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468061, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672468141, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468149, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672468190, "dur": 22, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672468213, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672468455, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672468467, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672468484, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468484, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468489, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672468539, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672468543, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672468549, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672468553, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672468555, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672468563, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672468581, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672468584, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672468604, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468607, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672468625, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468629, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672468640, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468645, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672468658, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672468660, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468662, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672468683, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468683, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672468700, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672468708, "dur": 67, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468775, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672468855, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672468863, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672468903, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672468924, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672468924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672469167, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672469179, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672469195, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672469200, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672469251, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672469255, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672469261, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672469265, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672469267, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672469275, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672469293, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672469296, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469296, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672469314, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672469318, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672469336, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672469340, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672469350, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672469355, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672469368, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672469371, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672469373, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672469394, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672469411, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672469419, "dur": 66, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672469486, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672469567, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672469576, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672469617, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672469637, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672469879, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672469891, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672469908, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672469913, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672469964, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672469968, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672469974, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672469977, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672469980, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672469988, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672469988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672470006, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672470009, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470027, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470030, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470049, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470053, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672470063, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470068, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672470081, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470084, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470086, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672470105, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672470122, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672470130, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470196, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470277, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470286, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470286, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672470326, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672470348, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672470593, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672470604, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470620, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470626, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672470677, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672470681, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470681, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672470687, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672470691, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672470693, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672470701, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672470719, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672470723, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470741, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470744, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470763, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470763, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470767, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672470778, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470784, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672470797, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470800, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470802, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672470821, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672470838, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672470847, "dur": 60, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470907, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672470987, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672470995, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672470995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672471035, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672471056, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471056, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672471300, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672471312, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672471328, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672471333, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672471384, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471384, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672471388, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672471395, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672471398, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471398, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672471400, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672471408, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672471426, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672471429, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471429, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672471448, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672471451, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672471470, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672471474, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672471485, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672471490, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672471504, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471504, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672471506, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672471508, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672471528, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672471545, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672471553, "dur": 62, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672471616, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672471697, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471697, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672471705, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471705, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672471746, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471746, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672471766, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672471766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672472009, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672472021, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472037, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472037, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472042, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672472094, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672472099, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672472105, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672472108, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672472111, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672472119, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672472136, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672472140, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472158, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472162, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472180, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472184, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672472194, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472199, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672472213, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472215, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472215, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472217, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672472237, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672472254, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472254, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672472262, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472327, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472409, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472417, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672472458, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672472478, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672472721, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672472733, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472749, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472754, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672472806, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672472810, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672472817, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672472820, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672472822, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672472830, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672472848, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672472851, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472851, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472869, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472872, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472891, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472895, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672472906, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472910, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672472924, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672472926, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672472928, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672472948, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672472965, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672472973, "dur": 62, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672472973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473036, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672473113, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473120, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672473153, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672473173, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672473415, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672473427, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672473444, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473449, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473449, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672473501, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672473505, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672473512, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672473515, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672473518, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672473525, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672473543, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672473546, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672473564, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473567, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672473585, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473589, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672473600, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473605, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672473618, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473618, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672473621, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473623, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473623, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672473642, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672473659, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672473668, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473744, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672473825, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672473834, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672473873, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672473895, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672473895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672474138, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672474150, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672474166, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474172, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672474223, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474223, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672474227, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672474234, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672474237, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672474239, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672474247, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474247, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672474265, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672474268, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672474287, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474290, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672474308, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474312, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672474323, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474328, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672474341, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672474343, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474345, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672474365, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672474382, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474382, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672474391, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474459, "dur": 74, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672474534, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474541, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672474575, "dur": 22, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672474597, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672474841, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672474853, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672474869, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474874, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672474926, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672474930, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672474936, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672474940, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672474942, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672474950, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672474967, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672474971, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672474989, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672474992, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672474992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672475010, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475015, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672475025, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475030, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672475044, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672475046, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475048, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672475068, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672475085, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672475094, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475170, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672475250, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475258, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475258, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672475298, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672475319, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475319, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672475562, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672475574, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672475590, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475590, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475595, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672475646, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672475650, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672475656, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672475660, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672475662, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672475670, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672475688, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672475691, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672475710, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475713, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475713, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672475731, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475735, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672475745, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475750, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672475764, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672475766, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475768, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672475788, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672475805, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672475814, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475814, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475882, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672475963, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672475971, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672475971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672476011, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672476031, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672476275, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672476287, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672476303, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672476308, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672476359, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672476363, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476363, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672476369, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672476373, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672476375, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476375, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672476383, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672476402, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672476405, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476405, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672476424, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672476427, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672476445, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672476449, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476449, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672476460, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476460, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672476465, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672476478, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672476481, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672476483, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672476503, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672476520, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672476528, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672476593, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672476674, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476674, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672476682, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672476722, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672476744, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672476986, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672476997, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672476997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477013, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477019, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477019, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672477069, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672477073, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672477080, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672477083, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672477086, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672477094, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672477111, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672477115, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477133, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477136, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477154, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477158, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672477169, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477174, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477174, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672477187, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477190, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477192, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672477211, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672477228, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672477237, "dur": 67, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477304, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477382, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477382, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477391, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672477431, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672477452, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672477696, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672477707, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477707, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477723, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477728, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672477780, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672477784, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672477791, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672477794, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672477797, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672477804, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672477822, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672477825, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477845, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477848, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477867, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477871, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672477881, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477881, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477886, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672477900, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672477903, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672477905, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672477924, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672477941, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672477950, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672477950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478014, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672478095, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478104, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672478144, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672478164, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478164, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672478408, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672478419, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672478435, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478435, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478440, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672478491, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672478495, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672478502, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672478505, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672478507, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672478515, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672478533, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478533, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672478536, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672478555, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478558, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672478576, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478580, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478580, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672478590, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478590, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478596, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672478609, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672478612, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478614, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672478633, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672478650, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672478658, "dur": 61, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478720, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672478800, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672478809, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672478849, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672478869, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672478869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672479111, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672479123, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672479139, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479144, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672479195, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672479199, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672479206, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479206, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672479209, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672479212, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672479219, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479219, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672479237, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672479240, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672479259, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479262, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672479280, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479284, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672479295, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479300, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672479313, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672479316, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479318, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672479338, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672479355, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672479363, "dur": 72, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479363, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479436, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672479514, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479514, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479522, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672479555, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672479574, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672479815, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672479827, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672479843, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479848, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672479900, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672479905, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672479911, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672479914, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672479917, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672479925, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672479942, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672479945, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672479963, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479966, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672479984, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672479988, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672479988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672480000, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480000, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480004, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672480018, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672480020, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480022, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672480041, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672480059, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672480067, "dur": 83, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480150, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672480227, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480235, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480235, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672480275, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672480295, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672480539, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672480550, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672480566, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480571, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480571, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672480623, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480623, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672480627, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672480634, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672480637, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672480639, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672480647, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480647, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672480665, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480665, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672480668, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672480687, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480690, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672480709, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480713, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480713, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672480723, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480728, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672480742, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480742, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672480745, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480747, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672480766, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672480783, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672480791, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480860, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672480936, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672480944, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672480976, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672480997, "dur": 240, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672480997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672481239, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672481250, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672481266, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672481272, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672481323, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672481327, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672481334, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672481337, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672481340, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672481347, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672481365, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672481369, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672481387, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672481390, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672481409, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672481413, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672481423, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481423, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672481428, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672481442, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672481444, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672481446, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672481466, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481466, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672481483, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672481491, "dur": 82, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672481573, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672481653, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672481661, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672481702, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672481723, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672481965, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672481977, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672481993, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672481998, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672481998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672482049, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672482053, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672482060, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672482063, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672482066, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672482074, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672482091, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672482095, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672482113, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482116, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672482134, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482138, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672482149, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482154, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672482167, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672482170, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482172, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672482191, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672482208, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672482216, "dur": 67, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482216, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482284, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672482365, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482373, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672482414, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672482434, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672482678, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672482690, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672482706, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482712, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672482762, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672482766, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672482773, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672482776, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672482779, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672482786, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672482805, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672482808, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672482826, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482829, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672482848, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482852, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672482862, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482862, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482867, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672482881, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482881, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672482884, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482886, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672482906, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672482923, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672482931, "dur": 61, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672482992, "dur": 74, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672482992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672483067, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672483074, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672483108, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672483127, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672483370, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672483381, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672483397, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672483402, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672483453, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672483457, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672483464, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672483467, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672483470, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672483477, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483477, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672483495, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672483498, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672483517, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672483520, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672483538, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672483542, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483542, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672483552, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483552, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672483557, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672483571, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483571, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672483573, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672483575, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672483595, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672483613, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672483621, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672483701, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672483777, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672483784, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672483817, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672483839, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672483839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672484082, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672484094, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672484110, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484110, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484116, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672484166, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672484170, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672484177, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672484180, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672484183, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672484190, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672484209, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672484212, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672484232, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484235, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484235, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672484253, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484257, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672484267, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484272, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672484286, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484286, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672484288, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484290, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672484311, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672484328, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672484336, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484417, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672484497, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484497, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484505, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672484545, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672484566, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672484808, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672484820, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672484836, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484841, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672484892, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672484896, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672484902, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672484906, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672484908, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672484916, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484916, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672484934, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672484937, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484937, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672484955, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484958, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672484976, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484980, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672484990, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672484995, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672484995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672485009, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672485011, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485013, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672485033, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672485050, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672485059, "dur": 73, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485132, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672485213, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485221, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672485262, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672485284, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672485528, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672485539, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672485555, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485560, "dur": 52, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672485613, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672485617, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672485624, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672485627, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672485629, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672485637, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672485655, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485655, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672485658, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672485677, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485680, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672485699, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485703, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485703, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672485713, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485713, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485718, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672485731, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672485734, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485736, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672485755, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672485772, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672485781, "dur": 63, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485844, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672485923, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672485931, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672485972, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672485993, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672485993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672486234, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672486246, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672486262, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672486267, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672486319, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486319, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672486323, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672486330, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486330, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672486333, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672486336, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672486343, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672486361, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672486365, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672486383, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672486386, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486386, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672486404, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486404, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672486408, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672486419, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672486423, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486423, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672486437, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672486439, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672486441, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486441, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672486461, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672486478, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672486487, "dur": 63, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672486550, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672486631, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486631, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672486640, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672486680, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672486702, "dur": 244, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672486947, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672486958, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672486974, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672486980, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672486980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672487031, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672487035, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672487042, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672487045, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672487048, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672487056, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487056, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672487073, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672487077, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672487095, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487098, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672487117, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487117, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487121, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672487132, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487136, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672487150, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672487152, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487154, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672487174, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487174, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672487191, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672487199, "dur": 61, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487260, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672487338, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487345, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672487378, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672487399, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672487643, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672487654, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672487670, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487675, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672487727, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672487731, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672487737, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672487740, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672487743, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672487751, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487751, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672487768, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672487772, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672487790, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487793, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672487812, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487816, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487816, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672487826, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487831, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487831, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672487845, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672487847, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487849, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672487869, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672487886, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672487895, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672487970, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672487970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672488050, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672488058, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672488099, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672488121, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672488363, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488363, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672488375, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488375, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672488391, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672488396, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488396, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672488448, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672488452, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672488459, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672488462, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672488464, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672488472, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672488490, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672488494, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488494, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672488512, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672488515, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672488533, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488533, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672488537, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672488548, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672488553, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672488566, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672488569, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672488571, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488571, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672488591, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672488608, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488608, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672488616, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672488681, "dur": 75, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488681, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672488757, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672488764, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672488797, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672488817, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672488817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672489060, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672489072, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489088, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489088, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489093, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672489145, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672489149, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672489155, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672489158, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672489161, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672489168, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672489186, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672489190, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489208, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489211, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489229, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489229, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489233, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672489243, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489248, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672489261, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489264, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489266, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672489286, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489286, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672489302, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489302, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672489311, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489388, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489470, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489479, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672489519, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672489540, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489540, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672489784, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672489796, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489812, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489818, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672489868, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672489872, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672489879, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672489882, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672489884, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672489892, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672489910, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672489914, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489932, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489935, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489953, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489957, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672489968, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489973, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672489986, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672489989, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672489991, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672489991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672490011, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672490027, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672490036, "dur": 69, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490105, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672490184, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490191, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672490224, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672490245, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672490489, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672490501, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672490517, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490523, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672490574, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672490578, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672490584, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672490587, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672490590, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490590, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672490598, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672490616, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672490619, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672490637, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490640, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672490659, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490663, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672490673, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490678, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672490692, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672490694, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490696, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672490717, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672490735, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672490743, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490812, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672490889, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672490896, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672490929, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672490949, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672490949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672491191, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672491203, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491203, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672491219, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491219, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672491225, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672491275, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672491280, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672491286, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491286, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672491290, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672491292, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672491300, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672491318, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672491321, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672491341, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672491344, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491344, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672491362, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672491366, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672491376, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672491381, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672491395, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672491397, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672491399, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672491420, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672491437, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672491445, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672491525, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672491603, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672491611, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672491651, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672491671, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672491914, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672491925, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672491941, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672491947, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672491998, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672491998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672492002, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672492009, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672492012, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672492015, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672492022, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672492039, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672492043, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492043, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672492061, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492064, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492064, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672492082, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492086, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672492097, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492101, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672492115, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672492118, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492120, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672492140, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672492157, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492157, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672492165, "dur": 67, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492165, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492233, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672492315, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492324, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672492364, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492364, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672492385, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492385, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672492627, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672492639, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672492655, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492655, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492660, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672492710, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672492715, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672492721, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672492724, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672492727, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672492735, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672492752, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672492756, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672492773, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492777, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672492795, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492799, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672492809, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492814, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492814, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672492827, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672492830, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492832, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672492851, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492851, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672492868, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672492876, "dur": 67, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492876, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672492944, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672492944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672493021, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672493028, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672493061, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672493083, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672493327, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672493338, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672493355, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672493360, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672493410, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672493414, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672493421, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672493424, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672493426, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672493434, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672493452, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672493455, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672493473, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672493476, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672493494, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493494, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672493498, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672493508, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672493513, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493513, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672493526, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672493529, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672493531, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672493550, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672493567, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672493575, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672493654, "dur": 81, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672493736, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672493744, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672493785, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672493807, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672493807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672494050, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672494062, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494077, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494083, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672494134, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672494138, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672494145, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672494148, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494148, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672494151, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672494158, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672494176, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672494180, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494197, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494201, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494220, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494224, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672494234, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494239, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672494252, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494255, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494257, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672494277, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672494294, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672494302, "dur": 58, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494302, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494361, "dur": 73, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494435, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494435, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494442, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672494475, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672494496, "dur": 244, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672494740, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672494752, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494768, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494773, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672494825, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672494829, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672494836, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672494839, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672494842, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672494849, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672494867, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672494870, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494888, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494891, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494910, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494914, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672494925, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494930, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672494944, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672494947, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672494948, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672494968, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672494985, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672494993, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672494993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495071, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672495149, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495157, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495157, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672495189, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672495210, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672495451, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672495463, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672495479, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495484, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495484, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672495535, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672495540, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495540, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672495546, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672495549, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672495552, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495552, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672495560, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672495577, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672495581, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672495599, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495599, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495603, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672495622, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495626, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672495636, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495641, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672495655, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495655, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672495657, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495659, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672495679, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672495696, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672495704, "dur": 82, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495786, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672495863, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672495870, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672495903, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672495924, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672495924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672496167, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672496178, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496178, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672496194, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672496200, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672496250, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672496255, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672496261, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672496264, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672496267, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672496274, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672496292, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672496295, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672496314, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672496317, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672496335, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672496339, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672496350, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672496354, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672496368, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672496370, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672496372, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672496392, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672496409, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672496417, "dur": 73, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672496491, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672496569, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672496576, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672496610, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496610, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672496630, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672496873, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672496884, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672496901, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672496906, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672496957, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672496961, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672496968, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672496971, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672496974, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672496981, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672496999, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672496999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672497002, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497021, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497024, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497042, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497046, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672497057, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497062, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672497076, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497078, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497080, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672497100, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672497116, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672497126, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497202, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497279, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497279, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497287, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672497320, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672497339, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672497583, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672497594, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497610, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497610, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497615, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672497666, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672497671, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672497677, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672497680, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672497683, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497683, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672497691, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672497708, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672497711, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497711, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497729, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497732, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497750, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497754, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672497765, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497770, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672497783, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497786, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497788, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672497808, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672497825, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672497833, "dur": 82, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672497916, "dur": 76, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497916, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672497993, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672497993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672498000, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498000, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672498033, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672498052, "dur": 240, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498052, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672498294, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672498305, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672498321, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672498327, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672498377, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672498381, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672498388, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672498391, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672498394, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672498401, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498401, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672498419, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672498422, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672498440, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672498444, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672498462, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672498466, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498466, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672498476, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672498481, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672498495, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672498498, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672498500, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672498519, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672498537, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672498546, "dur": 82, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672498628, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672498708, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672498717, "dur": 40, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672498757, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672498779, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672498779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672499021, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672499033, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499049, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499054, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672499104, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672499109, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672499115, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672499118, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672499121, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672499129, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499129, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672499146, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672499150, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499168, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499171, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499189, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499193, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672499203, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499203, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499208, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672499221, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499224, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499226, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499226, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672499245, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672499262, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672499271, "dur": 69, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499340, "dur": 77, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499419, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499427, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672499467, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672499488, "dur": 243, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672499732, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672499743, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499760, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499765, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672499815, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672499819, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499819, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672499826, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672499829, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672499832, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672499839, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672499857, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672499861, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499861, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499879, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499882, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499901, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499905, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672499915, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499920, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672499934, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672499936, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672499939, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672499958, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672499975, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672499984, "dur": 62, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672499984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500046, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672500126, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500134, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672500174, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500174, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672500194, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672500437, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672500448, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672500465, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500470, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672500520, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672500524, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500524, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672500531, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672500534, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672500537, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672500544, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672500562, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672500565, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672500584, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500587, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672500605, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500609, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672500620, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500625, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672500639, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672500641, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500643, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672500663, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672500680, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672500688, "dur": 71, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500760, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672500841, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672500849, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672500890, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672500911, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672500911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672501154, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672501166, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672501182, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672501187, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672501239, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501239, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672501243, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672501249, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501249, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672501252, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672501255, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672501263, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672501280, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672501284, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672501302, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501302, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672501305, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672501323, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672501327, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672501337, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672501342, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501342, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672501356, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672501359, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672501361, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672501381, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672501397, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672501406, "dur": 63, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672501469, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672501549, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672501557, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672501597, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672501618, "dur": 242, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501618, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672501861, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501861, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672501872, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672501888, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672501894, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672501945, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672501949, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672501955, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672501959, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501959, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672501961, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672501969, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672501986, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672501990, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672501990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502008, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502011, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502030, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502034, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672502045, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502049, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672502063, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502065, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502067, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672502087, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672502104, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672502113, "dur": 65, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502178, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502178, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502259, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502267, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672502307, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672502328, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672502570, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672502582, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502598, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502604, "dur": 49, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672502654, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672502658, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672502664, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672502667, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672502670, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672502678, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672502696, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672502699, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502717, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502721, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502739, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502739, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502744, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672502754, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502759, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672502772, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502775, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502777, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672502796, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672502813, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672502821, "dur": 64, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502885, "dur": 80, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672502966, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672502974, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672502974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672503015, "dur": 20, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672503036, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672503278, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503278, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672503290, "dur": 15, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672503306, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672503311, "dur": 50, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672503361, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672503366, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672503372, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672503375, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503375, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672503378, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672503385, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503385, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672503403, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503403, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672503406, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672503424, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672503427, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672503446, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672503450, "dur": 10, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672503460, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503460, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672503465, "dur": 13, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672503479, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672503481, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672503483, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672503503, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672503520, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672503528, "dur": 62, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672503591, "dur": 78, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672503671, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672503679, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672503719, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672503739, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503739, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672503981, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672503993, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672503993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672504009, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504015, "dur": 52, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672504067, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672504071, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672504078, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672504081, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672504084, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672504091, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672504109, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672504112, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672504130, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504133, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672504151, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504155, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672504166, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 118, "shared memory": 4096, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504171, "dur": 12, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672504184, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672504187, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 2.909091, "warps per SM": 11.636364, "grid": [384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 18}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 96u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504189, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 199872, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672504208, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [4, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672504225, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 256, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672504234, "dur": 68, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504303, "dur": 79, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672504383, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504392, "dur": 39, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672504432, "dur": 21, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672504454, "dur": 241, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504454, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672504696, "dur": 11, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 48, "shared memory": 256, "blocks per SM": 124.121216, "warps per SM": 496.48486, "grid": [16384, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672504708, "dur": 16, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [1, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672504725, "dur": 5, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 31.030304, "warps per SM": 124.121216, "grid": [4096, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504730, "dur": 51, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 159840, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672504782, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 84, "shared memory": 16, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672504786, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.57575756, "warps per SM": 2.3030303, "grid": [4, 1, 19], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672504792, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 7.757576, "grid": [8, 8, 1], "block": [32, 16, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672504795, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672504798, "dur": 6, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 0.969697, "warps per SM": 0.969697, "grid": [128, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504798, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672504806, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672504823, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672504826, "dur": 18, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672504845, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 30, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504848, "dur": 17, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672504866, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 56u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672504871, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 158944, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672504881, "dur": 134, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672504881, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672505015, "dur": 73, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672505089, "dur": 7, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505089, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672505097, "dur": 32, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672505130, "dur": 19, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672505150, "dur": 356, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672505507, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672505510, "dur": 3, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672505514, "dur": 2, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 24, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505514, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::CatArrayBatchedCopy<c10::BFloat16, unsigned int, 2, 128, 1>(c10::BFloat16*, at::native::(anonymous namespace)::CatArrInputTensorMetadata<c10::BFloat16, unsigned int, 128, 1>, at::native::(anonymous namespace)::TensorSizeStride<unsigned int, 4u>, int, unsigned int)", "pid": 0, "tid": 7, "ts": 1742522672505518, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 21, "shared memory": 0, "blocks per SM": 4, "warps per SM": 64, "grid": [264, 2, 1], "block": [512, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "memcpy32_post", "pid": 0, "tid": 7, "ts": 1742522672505522, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.060606062, "grid": [1, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672505524, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.030303031, "grid": [2, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505524, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "memcpy32_post", "pid": 0, "tid": 7, "ts": 1742522672505526, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.060606062, "grid": [1, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "memcpy32_post", "pid": 0, "tid": 7, "ts": 1742522672505527, "dur": 1, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.060606062, "grid": [1, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::radixSortKVInPlace<-2, -1, 32, 4, long, long, unsigned int>(at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, unsigned int, unsigned int, at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, bool)", "pid": 0, "tid": 7, "ts": 1742522672505529, "dur": 8, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 54, "shared memory": 1184, "blocks per SM": 0.007575758, "warps per SM": 0.007575758, "grid": [1, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672505538, "dur": 9, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 40, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 75}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "memcpy128", "pid": 0, "tid": 7, "ts": 1742522672505548, "dur": 4, "args": {"External id": 7077, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7077, "registers per thread": 16, "shared memory": 0, "blocks per SM": 6.787879, "warps per SM": 54.303032, "grid": [896, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 85}}, {"ph": "f", "id": 7077, "pid": 0, "tid": 7, "ts": 1742522672505548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaGraphLaunch", "pid": 494, "tid": 494, "ts": 1742522672417026, "dur": 4071, "args": {"External id": 7077, "cbid": 311, "correlation": 7077}}, {"ph": "s", "id": 7077, "pid": 494, "tid": 494, "ts": 1742522672417026, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDriverGetVersion", "pid": 494, "tid": 494, "ts": 1742522672421099, "dur": 0, "args": {"External id": 7078, "cbid": 1, "correlation": 7078}}, {"ph": "f", "id": 7078, "pid": 494, "tid": 494, "ts": 1742522672421099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672505560, "dur": 3, "args": {"External id": 7103, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7103, "registers per thread": 84, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7103, "pid": 0, "tid": 7, "ts": 1742522672505560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672421382, "dur": 34, "args": {"External id": 7103, "cbid": 307, "correlation": 7103}}, {"ph": "s", "id": 7103, "pid": 494, "tid": 494, "ts": 1742522672421382, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::indexSelectLargeIndex<c10::BFloat16, long, unsigned int, 2, 2, -2, true>(at::cuda::detail::TensorInfo<c10::BFloat16, unsigned int>, at::cuda::detail::TensorInfo<c10::BFloat16, unsigned int>, at::cuda::detail::TensorInfo<long, unsigned int>, int, int, unsigned int, unsigned int, long)", "pid": 0, "tid": 7, "ts": 1742522672505566, "dur": 12, "args": {"External id": 7121, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7121, "registers per thread": 32, "shared memory": 0, "blocks per SM": 8, "warps per SM": 32, "grid": [1056, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 50}}, {"ph": "f", "id": 7121, "pid": 0, "tid": 7, "ts": 1742522672505566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672421512, "dur": 8, "args": {"External id": 7121, "cbid": 211, "correlation": 7121}}, {"ph": "s", "id": 7121, "pid": 494, "tid": 494, "ts": 1742522672421512, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncGetAttributes", "pid": 494, "tid": 494, "ts": 1742522672421589, "dur": 7, "args": {"External id": 7134, "cbid": 15, "correlation": 7134}}, {"ph": "f", "id": 7134, "pid": 494, "tid": 494, "ts": 1742522672421589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memset", "name": "Memset (Device)", "pid": 0, "tid": 7, "ts": 1742522672505581, "dur": 0, "args": {"External id": 7135, "device": 0, "context": 1, "stream": 7, "correlation": 7135, "bytes": 4, "memory bandwidth (GB/s)": 0.004032258064516129}}, {"ph": "f", "id": 7135, "pid": 0, "tid": 7, "ts": 1742522672505581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemsetAsync", "pid": 494, "tid": 494, "ts": 1742522672421615, "dur": 14, "args": {"External id": 7135, "cbid": 51, "correlation": 7135}}, {"ph": "s", "id": 7135, "pid": 494, "tid": 494, "ts": 1742522672421615, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672421632, "dur": 2, "args": {"External id": 7136, "cbid": 273, "correlation": 7136}}, {"ph": "f", "id": 7136, "pid": 494, "tid": 494, "ts": 1742522672421632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672421634, "dur": 0, "args": {"External id": 7137, "cbid": 273, "correlation": 7137}}, {"ph": "f", "id": 7137, "pid": 494, "tid": 494, "ts": 1742522672421634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672421635, "dur": 0, "args": {"External id": 7138, "cbid": 273, "correlation": 7138}}, {"ph": "f", "id": 7138, "pid": 494, "tid": 494, "ts": 1742522672421635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "sm90_xmma_gemm_bf16f32_bf16f32_f32_tn_n_tilesize128x128x64_warpgroupsize1x1x1_execute_segment_k_off_kernel__5x_cublas", "pid": 0, "tid": 7, "ts": 1742522672505584, "dur": 756, "args": {"External id": 7139, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7139, "registers per thread": 168, "shared memory": 231424, "blocks per SM": 0.90909094, "warps per SM": 10.909091, "grid": [60, 2, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7139, "pid": 0, "tid": 7, "ts": 1742522672505584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672421636, "dur": 7, "args": {"External id": 7139, "cbid": 430, "correlation": 7139}}, {"ph": "s", "id": 7139, "pid": 494, "tid": 494, "ts": 1742522672421636, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421827, "dur": 3, "args": {"External id": 7144, "cbid": 138, "correlation": 7144}}, {"ph": "f", "id": 7144, "pid": 494, "tid": 494, "ts": 1742522672421827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421832, "dur": 1, "args": {"External id": 7145, "cbid": 138, "correlation": 7145}}, {"ph": "f", "id": 7145, "pid": 494, "tid": 494, "ts": 1742522672421832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421835, "dur": 1, "args": {"External id": 7146, "cbid": 138, "correlation": 7146}}, {"ph": "f", "id": 7146, "pid": 494, "tid": 494, "ts": 1742522672421835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421837, "dur": 1, "args": {"External id": 7147, "cbid": 138, "correlation": 7147}}, {"ph": "f", "id": 7147, "pid": 494, "tid": 494, "ts": 1742522672421837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421839, "dur": 1, "args": {"External id": 7148, "cbid": 138, "correlation": 7148}}, {"ph": "f", "id": 7148, "pid": 494, "tid": 494, "ts": 1742522672421839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421841, "dur": 1, "args": {"External id": 7149, "cbid": 138, "correlation": 7149}}, {"ph": "f", "id": 7149, "pid": 494, "tid": 494, "ts": 1742522672421841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421843, "dur": 1, "args": {"External id": 7150, "cbid": 138, "correlation": 7150}}, {"ph": "f", "id": 7150, "pid": 494, "tid": 494, "ts": 1742522672421843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421845, "dur": 1, "args": {"External id": 7151, "cbid": 138, "correlation": 7151}}, {"ph": "f", "id": 7151, "pid": 494, "tid": 494, "ts": 1742522672421845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421847, "dur": 1, "args": {"External id": 7152, "cbid": 138, "correlation": 7152}}, {"ph": "f", "id": 7152, "pid": 494, "tid": 494, "ts": 1742522672421847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421848, "dur": 1, "args": {"External id": 7153, "cbid": 138, "correlation": 7153}}, {"ph": "f", "id": 7153, "pid": 494, "tid": 494, "ts": 1742522672421848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421850, "dur": 1, "args": {"External id": 7154, "cbid": 138, "correlation": 7154}}, {"ph": "f", "id": 7154, "pid": 494, "tid": 494, "ts": 1742522672421850, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421852, "dur": 1, "args": {"External id": 7155, "cbid": 138, "correlation": 7155}}, {"ph": "f", "id": 7155, "pid": 494, "tid": 494, "ts": 1742522672421852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421854, "dur": 1, "args": {"External id": 7156, "cbid": 138, "correlation": 7156}}, {"ph": "f", "id": 7156, "pid": 494, "tid": 494, "ts": 1742522672421854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421856, "dur": 1, "args": {"External id": 7157, "cbid": 138, "correlation": 7157}}, {"ph": "f", "id": 7157, "pid": 494, "tid": 494, "ts": 1742522672421856, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421858, "dur": 1, "args": {"External id": 7158, "cbid": 138, "correlation": 7158}}, {"ph": "f", "id": 7158, "pid": 494, "tid": 494, "ts": 1742522672421858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421860, "dur": 1, "args": {"External id": 7159, "cbid": 138, "correlation": 7159}}, {"ph": "f", "id": 7159, "pid": 494, "tid": 494, "ts": 1742522672421860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421862, "dur": 1, "args": {"External id": 7160, "cbid": 138, "correlation": 7160}}, {"ph": "f", "id": 7160, "pid": 494, "tid": 494, "ts": 1742522672421862, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421863, "dur": 1, "args": {"External id": 7161, "cbid": 138, "correlation": 7161}}, {"ph": "f", "id": 7161, "pid": 494, "tid": 494, "ts": 1742522672421863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672421865, "dur": 1, "args": {"External id": 7162, "cbid": 138, "correlation": 7162}}, {"ph": "f", "id": 7162, "pid": 494, "tid": 494, "ts": 1742522672421865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pinned -> Device)", "pid": 0, "tid": 7, "ts": 1742522672506344, "dur": 2, "args": {"External id": 7172, "device": 0, "context": 1, "stream": 7, "correlation": 7172, "bytes": 512, "memory bandwidth (GB/s)": 0.2}}, {"ph": "f", "id": 7172, "pid": 0, "tid": 7, "ts": 1742522672506344, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672421953, "dur": 11, "args": {"External id": 7172, "cbid": 41, "correlation": 7172}}, {"ph": "s", "id": 7172, "pid": 494, "tid": 494, "ts": 1742522672421953, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pinned -> Device)", "pid": 0, "tid": 7, "ts": 1742522672506349, "dur": 2, "args": {"External id": 7184, "device": 0, "context": 1, "stream": 7, "correlation": 7184, "bytes": 512, "memory bandwidth (GB/s)": 0.17777777777777778}}, {"ph": "f", "id": 7184, "pid": 0, "tid": 7, "ts": 1742522672506349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672421976, "dur": 4, "args": {"External id": 7184, "cbid": 41, "correlation": 7184}}, {"ph": "s", "id": 7184, "pid": 494, "tid": 494, "ts": 1742522672421976, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pinned -> Device)", "pid": 0, "tid": 7, "ts": 1742522672506355, "dur": 2, "args": {"External id": 7196, "device": 0, "context": 1, "stream": 7, "correlation": 7196, "bytes": 512, "memory bandwidth (GB/s)": 0.20253164556962025}}, {"ph": "f", "id": 7196, "pid": 0, "tid": 7, "ts": 1742522672506355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672421988, "dur": 3, "args": {"External id": 7196, "cbid": 41, "correlation": 7196}}, {"ph": "s", "id": 7196, "pid": 494, "tid": 494, "ts": 1742522672421988, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pinned -> Device)", "pid": 0, "tid": 7, "ts": 1742522672506360, "dur": 2, "args": {"External id": 7208, "device": 0, "context": 1, "stream": 7, "correlation": 7208, "bytes": 512, "memory bandwidth (GB/s)": 0.1839080459770115}}, {"ph": "f", "id": 7208, "pid": 0, "tid": 7, "ts": 1742522672506360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672422000, "dur": 3, "args": {"External id": 7208, "cbid": 41, "correlation": 7208}}, {"ph": "s", "id": 7208, "pid": 494, "tid": 494, "ts": 1742522672422000, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pinned -> Device)", "pid": 0, "tid": 7, "ts": 1742522672506365, "dur": 2, "args": {"External id": 7220, "device": 0, "context": 1, "stream": 7, "correlation": 7220, "bytes": 512, "memory bandwidth (GB/s)": 0.1797752808988764}}, {"ph": "f", "id": 7220, "pid": 0, "tid": 7, "ts": 1742522672506365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672422010, "dur": 3, "args": {"External id": 7220, "cbid": 41, "correlation": 7220}}, {"ph": "s", "id": 7220, "pid": 494, "tid": 494, "ts": 1742522672422010, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pinned -> Device)", "pid": 0, "tid": 7, "ts": 1742522672506371, "dur": 3, "args": {"External id": 7232, "device": 0, "context": 1, "stream": 7, "correlation": 7232, "bytes": 512, "memory bandwidth (GB/s)": 0.16494845360824742}}, {"ph": "f", "id": 7232, "pid": 0, "tid": 7, "ts": 1742522672506371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672422022, "dur": 3, "args": {"External id": 7232, "cbid": 41, "correlation": 7232}}, {"ph": "s", "id": 7232, "pid": 494, "tid": 494, "ts": 1742522672422022, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::CatArrayBatchedCopy_aligned16_contig<float, unsigned int, 2, 128, 1>(float*, at::native::(anonymous namespace)::CatArrInputTensorMetadata<float, unsigned int, 128, 1>, at::native::(anonymous namespace)::TensorSizeStride<unsigned int, 4u>, int, unsigned int)", "pid": 0, "tid": 7, "ts": 1742522672506377, "dur": 4, "args": {"External id": 7272, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7272, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [8, 128, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7272, "pid": 0, "tid": 7, "ts": 1742522672506377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422421, "dur": 12, "args": {"External id": 7272, "cbid": 211, "correlation": 7272}}, {"ph": "s", "id": 7272, "pid": 494, "tid": 494, "ts": 1742522672422421, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::CatArrayBatchedCopy_aligned16_contig<long, unsigned int, 2, 128, 1>(long*, at::native::(anonymous namespace)::CatArrInputTensorMetadata<long, unsigned int, 128, 1>, at::native::(anonymous namespace)::TensorSizeStride<unsigned int, 4u>, int, unsigned int)", "pid": 0, "tid": 7, "ts": 1742522672506384, "dur": 5, "args": {"External id": 7283, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7283, "registers per thread": 26, "shared memory": 0, "blocks per SM": 15.515152, "warps per SM": 62.060608, "grid": [16, 128, 1], "block": [128, 1, 1], "est. achieved occupancy %": 97}}, {"ph": "f", "id": 7283, "pid": 0, "tid": 7, "ts": 1742522672506384, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422474, "dur": 6, "args": {"External id": 7283, "cbid": 211, "correlation": 7283}}, {"ph": "s", "id": 7283, "pid": 494, "tid": 494, "ts": 1742522672422474, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void vllm::apply_penalty_kernel<float, true, 256>(float*, float const*, float const*, float const*, long const*, int**, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672506391, "dur": 70, "args": {"External id": 7303, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7303, "registers per thread": 168, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7303, "pid": 0, "tid": 7, "ts": 1742522672506391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422514, "dur": 5, "args": {"External id": 7303, "cbid": 211, "correlation": 7303}}, {"ph": "s", "id": 7303, "pid": 494, "tid": 494, "ts": 1742522672422514, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672422535, "dur": 1, "args": {"External id": 7322, "cbid": 273, "correlation": 7322}}, {"ph": "f", "id": 7322, "pid": 494, "tid": 494, "ts": 1742522672422535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::topk_kernel<float, unsigned int, 2>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, bool, bool, unsigned int, at::cuda::detail::TensorInfo<float, unsigned int>, at::cuda::detail::TensorInfo<long, unsigned int>)", "pid": 0, "tid": 7, "ts": 1742522672506463, "dur": 155, "args": {"External id": 7323, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7323, "registers per thread": 64, "shared memory": 113216, "blocks per SM": 0.969697, "warps per SM": 31.030304, "grid": [128, 1, 1], "block": [1024, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7323, "pid": 0, "tid": 7, "ts": 1742522672506463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422538, "dur": 5, "args": {"External id": 7323, "cbid": 211, "correlation": 7323}}, {"ph": "s", "id": 7323, "pid": 494, "tid": 494, "ts": 1742522672422538, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672506622, "dur": 2, "args": {"External id": 7334, "device": 0, "context": 1, "stream": 7, "correlation": 7334, "bytes": 2097152, "memory bandwidth (GB/s)": 809.0864197530864}}, {"ph": "f", "id": 7334, "pid": 0, "tid": 7, "ts": 1742522672506622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672422562, "dur": 11, "args": {"External id": 7334, "cbid": 41, "correlation": 7334}}, {"ph": "s", "id": 7334, "pid": 494, "tid": 494, "ts": 1742522672422562, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672506627, "dur": 1, "args": {"External id": 7352, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7352, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 0.969697, "grid": [64, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7352, "pid": 0, "tid": 7, "ts": 1742522672506627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422596, "dur": 4, "args": {"External id": 7352, "cbid": 211, "correlation": 7352}}, {"ph": "s", "id": 7352, "pid": 494, "tid": 494, "ts": 1742522672422596, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506630, "dur": 3, "args": {"External id": 7358, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7358, "registers per thread": 18, "shared memory": 0, "blocks per SM": 15.515152, "warps per SM": 62.060608, "grid": [2048, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 97}}, {"ph": "f", "id": 7358, "pid": 0, "tid": 7, "ts": 1742522672506630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422610, "dur": 4, "args": {"External id": 7358, "cbid": 211, "correlation": 7358}}, {"ph": "s", "id": 7358, "pid": 494, "tid": 494, "ts": 1742522672422610, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672506636, "dur": 2, "args": {"External id": 7364, "device": 0, "context": 1, "stream": 7, "correlation": 7364, "bytes": 2097152, "memory bandwidth (GB/s)": 923.0422535211268}}, {"ph": "f", "id": 7364, "pid": 0, "tid": 7, "ts": 1742522672506636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672422618, "dur": 4, "args": {"External id": 7364, "cbid": 41, "correlation": 7364}}, {"ph": "s", "id": 7364, "pid": 494, "tid": 494, "ts": 1742522672422618, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::radixSortKVInPlace<2, -1, 128, 32, float, long, unsigned int>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, unsigned int, at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, bool)", "pid": 0, "tid": 7, "ts": 1742522672506641, "dur": 24, "args": {"External id": 7369, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7369, "registers per thread": 217, "shared memory": 33808, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 7369, "pid": 0, "tid": 7, "ts": 1742522672506641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422631, "dur": 5, "args": {"External id": 7369, "cbid": 211, "correlation": 7369}}, {"ph": "s", "id": 7369, "pid": 494, "tid": 494, "ts": 1742522672422631, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void vllm::mask_top_p_kernel<4, float, float, 256>(float*, float const*, int, int)", "pid": 0, "tid": 7, "ts": 1742522672506668, "dur": 8, "args": {"External id": 7375, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7375, "registers per thread": 32, "shared memory": 1216, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 7375, "pid": 0, "tid": 7, "ts": 1742522672506668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422645, "dur": 4, "args": {"External id": 7375, "cbid": 211, "correlation": 7375}}, {"ph": "s", "id": 7375, "pid": 494, "tid": 494, "ts": 1742522672422645, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672506679, "dur": 1, "args": {"External id": 7393, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7393, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 0.969697, "grid": [64, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7393, "pid": 0, "tid": 7, "ts": 1742522672506679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422663, "dur": 3, "args": {"External id": 7393, "cbid": 211, "correlation": 7393}}, {"ph": "s", "id": 7393, "pid": 494, "tid": 494, "ts": 1742522672422663, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506682, "dur": 10, "args": {"External id": 7406, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7406, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7406, "pid": 0, "tid": 7, "ts": 1742522672506682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422701, "dur": 5, "args": {"External id": 7406, "cbid": 211, "correlation": 7406}}, {"ph": "s", "id": 7406, "pid": 494, "tid": 494, "ts": 1742522672422701, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506695, "dur": 5, "args": {"External id": 7416, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7416, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7416, "pid": 0, "tid": 7, "ts": 1742522672506695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422725, "dur": 4, "args": {"External id": 7416, "cbid": 211, "correlation": 7416}}, {"ph": "s", "id": 7416, "pid": 494, "tid": 494, "ts": 1742522672422725, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::cunn_SoftMaxForward<4, float, float, float, at::native::(anonymous namespace)::SoftMaxForwardEpilogue>(float*, float const*, int)", "pid": 0, "tid": 7, "ts": 1742522672506704, "dur": 5, "args": {"External id": 7427, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7427, "registers per thread": 30, "shared memory": 2048, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 7427, "pid": 0, "tid": 7, "ts": 1742522672506704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422751, "dur": 4, "args": {"External id": 7427, "cbid": 211, "correlation": 7427}}, {"ph": "s", "id": 7427, "pid": 494, "tid": 494, "ts": 1742522672422751, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::log_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::log_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672506712, "dur": 2, "args": {"External id": 7437, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7437, "registers per thread": 22, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7437, "pid": 0, "tid": 7, "ts": 1742522672506712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422770, "dur": 5, "args": {"External id": 7437, "cbid": 211, "correlation": 7437}}, {"ph": "s", "id": 7437, "pid": 494, "tid": 494, "ts": 1742522672422770, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672506716, "dur": 21, "args": {"External id": 7450, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7450, "registers per thread": 16, "shared memory": 0, "blocks per SM": 244.84848, "warps per SM": 979.3939, "grid": [32320, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7450, "pid": 0, "tid": 7, "ts": 1742522672506716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422797, "dur": 4, "args": {"External id": 7450, "cbid": 211, "correlation": 7450}}, {"ph": "s", "id": 7450, "pid": 494, "tid": 494, "ts": 1742522672422797, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506740, "dur": 22, "args": {"External id": 7456, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7456, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7456, "pid": 0, "tid": 7, "ts": 1742522672506740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422810, "dur": 4, "args": {"External id": 7456, "cbid": 211, "correlation": 7456}}, {"ph": "s", "id": 7456, "pid": 494, "tid": 494, "ts": 1742522672422810, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672506765, "dur": 25, "args": {"External id": 7475, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7475, "registers per thread": 16, "shared memory": 0, "blocks per SM": 244.84848, "warps per SM": 979.3939, "grid": [32320, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 7475, "pid": 0, "tid": 7, "ts": 1742522672506765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422851, "dur": 3, "args": {"External id": 7475, "cbid": 211, "correlation": 7475}}, {"ph": "s", "id": 7475, "pid": 494, "tid": 494, "ts": 1742522672422851, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506793, "dur": 20, "args": {"External id": 7481, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7481, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7481, "pid": 0, "tid": 7, "ts": 1742522672506793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422861, "dur": 3, "args": {"External id": 7481, "cbid": 211, "correlation": 7481}}, {"ph": "s", "id": 7481, "pid": 494, "tid": 494, "ts": 1742522672422861, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506815, "dur": 2, "args": {"External id": 7494, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7494, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7494, "pid": 0, "tid": 7, "ts": 1742522672506815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422883, "dur": 3, "args": {"External id": 7494, "cbid": 211, "correlation": 7494}}, {"ph": "s", "id": 7494, "pid": 494, "tid": 494, "ts": 1742522672422883, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506821, "dur": 2, "args": {"External id": 7504, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7504, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7504, "pid": 0, "tid": 7, "ts": 1742522672506821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422901, "dur": 3, "args": {"External id": 7504, "cbid": 211, "correlation": 7504}}, {"ph": "s", "id": 7504, "pid": 494, "tid": 494, "ts": 1742522672422901, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::compare_scalar_kernel<float>(at::TensorIteratorBase&, at::native::(anonymous namespace)::OpType, float)::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::compare_scalar_kernel<float>(at::TensorIteratorBase&, at::native::(anonymous namespace)::OpType, float)::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672506826, "dur": 1, "args": {"External id": 7514, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7514, "registers per thread": 18, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7514, "pid": 0, "tid": 7, "ts": 1742522672506826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422930, "dur": 4, "args": {"External id": 7514, "cbid": 211, "correlation": 7514}}, {"ph": "s", "id": 7514, "pid": 494, "tid": 494, "ts": 1742522672422930, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<float>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672506830, "dur": 1, "args": {"External id": 7524, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7524, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7524, "pid": 0, "tid": 7, "ts": 1742522672506830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422953, "dur": 4, "args": {"External id": 7524, "cbid": 211, "correlation": 7524}}, {"ph": "s", "id": 7524, "pid": 494, "tid": 494, "ts": 1742522672422953, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::launch_clamp_scalar(at::TensorIteratorBase&, c10::Scalar, c10::Scalar, at::native::detail::ClampLimits)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::(anonymous namespace)::launch_clamp_scalar(at::TensorIteratorBase&, c10::Scalar, c10::Scalar, at::native::detail::ClampLimits)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672506834, "dur": 1, "args": {"External id": 7534, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7534, "registers per thread": 22, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7534, "pid": 0, "tid": 7, "ts": 1742522672506834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422976, "dur": 5, "args": {"External id": 7534, "cbid": 211, "correlation": 7534}}, {"ph": "s", "id": 7534, "pid": 494, "tid": 494, "ts": 1742522672422976, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<float>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<float>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672506837, "dur": 1, "args": {"External id": 7547, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7547, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7547, "pid": 0, "tid": 7, "ts": 1742522672506837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672422996, "dur": 2, "args": {"External id": 7547, "cbid": 211, "correlation": 7547}}, {"ph": "s", "id": 7547, "pid": 494, "tid": 494, "ts": 1742522672422996, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506841, "dur": 1, "args": {"External id": 7565, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7565, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7565, "pid": 0, "tid": 7, "ts": 1742522672506841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423020, "dur": 4, "args": {"External id": 7565, "cbid": 211, "correlation": 7565}}, {"ph": "s", "id": 7565, "pid": 494, "tid": 494, "ts": 1742522672423020, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::exp_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::exp_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672506845, "dur": 1, "args": {"External id": 7575, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7575, "registers per thread": 19, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7575, "pid": 0, "tid": 7, "ts": 1742522672506845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423039, "dur": 4, "args": {"External id": 7575, "cbid": 211, "correlation": 7575}}, {"ph": "s", "id": 7575, "pid": 494, "tid": 494, "ts": 1742522672423039, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672423065, "dur": 1, "args": {"External id": 7593, "cbid": 317, "correlation": 7593}}, {"ph": "f", "id": 7593, "pid": 494, "tid": 494, "ts": 1742522672423065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::uniform_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672506849, "dur": 1, "args": {"External id": 7595, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7595, "registers per thread": 52, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.060606062, "grid": [1, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7595, "pid": 0, "tid": 7, "ts": 1742522672506849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423069, "dur": 5, "args": {"External id": 7595, "cbid": 211, "correlation": 7595}}, {"ph": "s", "id": 7595, "pid": 494, "tid": 494, "ts": 1742522672423069, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::CompareFunctor<float>, at::detail::Array<char*, 3> >(int, at::native::(anonymous namespace)::CompareFunctor<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672506854, "dur": 1, "args": {"External id": 7605, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7605, "registers per thread": 24, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7605, "pid": 0, "tid": 7, "ts": 1742522672506854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423087, "dur": 3, "args": {"External id": 7605, "cbid": 211, "correlation": 7605}}, {"ph": "s", "id": 7605, "pid": 494, "tid": 494, "ts": 1742522672423087, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506858, "dur": 18, "args": {"External id": 7615, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7615, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7615, "pid": 0, "tid": 7, "ts": 1742522672506858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423104, "dur": 3, "args": {"External id": 7615, "cbid": 211, "correlation": 7615}}, {"ph": "s", "id": 7615, "pid": 494, "tid": 494, "ts": 1742522672423104, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::exp_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::exp_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672506878, "dur": 2, "args": {"External id": 7625, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7625, "registers per thread": 19, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7625, "pid": 0, "tid": 7, "ts": 1742522672506878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423116, "dur": 2, "args": {"External id": 7625, "cbid": 211, "correlation": 7625}}, {"ph": "s", "id": 7625, "pid": 494, "tid": 494, "ts": 1742522672423116, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<float>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<float>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672506882, "dur": 2, "args": {"External id": 7635, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7635, "registers per thread": 26, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7635, "pid": 0, "tid": 7, "ts": 1742522672506882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423126, "dur": 2, "args": {"External id": 7635, "cbid": 211, "correlation": 7635}}, {"ph": "s", "id": 7635, "pid": 494, "tid": 494, "ts": 1742522672423126, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::launch_clamp_scalar(at::TensorIteratorBase&, c10::Scalar, c10::Scalar, at::native::detail::ClampLimits)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::(anonymous namespace)::launch_clamp_scalar(at::TensorIteratorBase&, c10::Scalar, c10::Scalar, at::native::detail::ClampLimits)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672506888, "dur": 2, "args": {"External id": 7642, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7642, "registers per thread": 22, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7642, "pid": 0, "tid": 7, "ts": 1742522672506888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423143, "dur": 3, "args": {"External id": 7642, "cbid": 211, "correlation": 7642}}, {"ph": "s", "id": 7642, "pid": 494, "tid": 494, "ts": 1742522672423143, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": 7, "ts": 1742522672506893, "dur": 9, "args": {"External id": 7658, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7658, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.060606062, "warps per SM": 0.969697, "grid": [8, 1, 1], "block": [32, 16, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7658, "pid": 0, "tid": 7, "ts": 1742522672506893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423166, "dur": 5, "args": {"External id": 7658, "cbid": 211, "correlation": 7658}}, {"ph": "s", "id": 7658, "pid": 494, "tid": 494, "ts": 1742522672423166, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> > >(at::TensorIteratorBase&, at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> > const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> > >(at::TensorIteratorBase&, at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> > const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506905, "dur": 4, "args": {"External id": 7668, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7668, "registers per thread": 19, "shared memory": 0, "blocks per SM": 15.515152, "warps per SM": 62.060608, "grid": [2048, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 97}}, {"ph": "f", "id": 7668, "pid": 0, "tid": 7, "ts": 1742522672506905, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423187, "dur": 4, "args": {"External id": 7668, "cbid": 211, "correlation": 7668}}, {"ph": "s", "id": 7668, "pid": 494, "tid": 494, "ts": 1742522672423187, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672423211, "dur": 0, "args": {"External id": 7686, "cbid": 317, "correlation": 7686}}, {"ph": "f", "id": 7686, "pid": 494, "tid": 494, "ts": 1742522672423211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672506912, "dur": 4, "args": {"External id": 7688, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7688, "registers per thread": 52, "shared memory": 0, "blocks per SM": 8, "warps per SM": 64, "grid": [1056, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 50}}, {"ph": "f", "id": 7688, "pid": 0, "tid": 7, "ts": 1742522672506912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423213, "dur": 5, "args": {"External id": 7688, "cbid": 211, "correlation": 7688}}, {"ph": "s", "id": 7688, "pid": 494, "tid": 494, "ts": 1742522672423213, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> >, at::detail::Array<char*, 3> >(int, at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> >, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672506919, "dur": 3, "args": {"External id": 7694, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7694, "registers per thread": 26, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7694, "pid": 0, "tid": 7, "ts": 1742522672506919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423226, "dur": 4, "args": {"External id": 7694, "cbid": 211, "correlation": 7694}}, {"ph": "s", "id": 7694, "pid": 494, "tid": 494, "ts": 1742522672423226, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::ArgMaxOps<float>, unsigned int, long, 4> >(at::native::ReduceOp<float, at::native::ArgMaxOps<float>, unsigned int, long, 4>)", "pid": 0, "tid": 7, "ts": 1742522672506925, "dur": 16, "args": {"External id": 7708, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7708, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.060606062, "warps per SM": 0.969697, "grid": [8, 1, 1], "block": [32, 16, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 7708, "pid": 0, "tid": 7, "ts": 1742522672506925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423247, "dur": 4, "args": {"External id": 7708, "cbid": 211, "correlation": 7708}}, {"ph": "s", "id": 7708, "pid": 494, "tid": 494, "ts": 1742522672423247, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506944, "dur": 2, "args": {"External id": 7718, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7718, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7718, "pid": 0, "tid": 7, "ts": 1742522672506944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423263, "dur": 4, "args": {"External id": 7718, "cbid": 211, "correlation": 7718}}, {"ph": "s", "id": 7718, "pid": 494, "tid": 494, "ts": 1742522672423263, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}, at::detail::Array<char*, 4> >(int, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}, at::detail::Array<char*, 4>)", "pid": 0, "tid": 7, "ts": 1742522672506950, "dur": 1, "args": {"External id": 7736, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7736, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7736, "pid": 0, "tid": 7, "ts": 1742522672506950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423290, "dur": 4, "args": {"External id": 7736, "cbid": 211, "correlation": 7736}}, {"ph": "s", "id": 7736, "pid": 494, "tid": 494, "ts": 1742522672423290, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672506954, "dur": 2, "args": {"External id": 7749, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7749, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7749, "pid": 0, "tid": 7, "ts": 1742522672506954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423309, "dur": 3, "args": {"External id": 7749, "cbid": 211, "correlation": 7749}}, {"ph": "s", "id": 7749, "pid": 494, "tid": 494, "ts": 1742522672423309, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::sbtopk::gatherTopK<float, unsigned int, 2, false>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, bool, unsigned int, unsigned int, at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, float*)", "pid": 0, "tid": 7, "ts": 1742522672506958, "dur": 33, "args": {"External id": 7764, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7764, "registers per thread": 47, "shared memory": 128, "blocks per SM": 0.969697, "warps per SM": 31.030304, "grid": [128, 1, 1], "block": [1024, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 7764, "pid": 0, "tid": 7, "ts": 1742522672506958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423336, "dur": 5, "args": {"External id": 7764, "cbid": 211, "correlation": 7764}}, {"ph": "s", "id": 7764, "pid": 494, "tid": 494, "ts": 1742522672423336, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423347, "dur": 2, "args": {"External id": 7767, "cbid": 200, "correlation": 7767}}, {"ph": "f", "id": 7767, "pid": 494, "tid": 494, "ts": 1742522672423347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423349, "dur": 0, "args": {"External id": 7768, "cbid": 200, "correlation": 7768}}, {"ph": "f", "id": 7768, "pid": 494, "tid": 494, "ts": 1742522672423349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423350, "dur": 0, "args": {"External id": 7769, "cbid": 200, "correlation": 7769}}, {"ph": "f", "id": 7769, "pid": 494, "tid": 494, "ts": 1742522672423350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423350, "dur": 0, "args": {"External id": 7770, "cbid": 200, "correlation": 7770}}, {"ph": "f", "id": 7770, "pid": 494, "tid": 494, "ts": 1742522672423350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncGetAttributes", "pid": 494, "tid": 494, "ts": 1742522672423351, "dur": 3, "args": {"External id": 7771, "cbid": 15, "correlation": 7771}}, {"ph": "f", "id": 7771, "pid": 494, "tid": 494, "ts": 1742522672423351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423356, "dur": 2, "args": {"External id": 7772, "cbid": 251, "correlation": 7772}}, {"ph": "f", "id": 7772, "pid": 494, "tid": 494, "ts": 1742522672423356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423359, "dur": 0, "args": {"External id": 7773, "cbid": 251, "correlation": 7773}}, {"ph": "f", "id": 7773, "pid": 494, "tid": 494, "ts": 1742522672423359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423360, "dur": 0, "args": {"External id": 7774, "cbid": 251, "correlation": 7774}}, {"ph": "f", "id": 7774, "pid": 494, "tid": 494, "ts": 1742522672423360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423360, "dur": 0, "args": {"External id": 7775, "cbid": 251, "correlation": 7775}}, {"ph": "f", "id": 7775, "pid": 494, "tid": 494, "ts": 1742522672423360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423361, "dur": 0, "args": {"External id": 7776, "cbid": 251, "correlation": 7776}}, {"ph": "f", "id": 7776, "pid": 494, "tid": 494, "ts": 1742522672423361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423361, "dur": 0, "args": {"External id": 7777, "cbid": 251, "correlation": 7777}}, {"ph": "f", "id": 7777, "pid": 494, "tid": 494, "ts": 1742522672423361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423362, "dur": 0, "args": {"External id": 7778, "cbid": 251, "correlation": 7778}}, {"ph": "f", "id": 7778, "pid": 494, "tid": 494, "ts": 1742522672423362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423362, "dur": 0, "args": {"External id": 7779, "cbid": 251, "correlation": 7779}}, {"ph": "f", "id": 7779, "pid": 494, "tid": 494, "ts": 1742522672423362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::bitonicSortKVInPlace<2, -1, 16, 16, float, long, at::native::GTOp<float, true>, unsigned int>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, unsigned int, at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, at::native::GTOp<float, true>)", "pid": 0, "tid": 7, "ts": 1742522672506995, "dur": 5, "args": {"External id": 7781, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7781, "registers per thread": 38, "shared memory": 6656, "blocks per SM": 0.969697, "warps per SM": 0.4848485, "grid": [128, 1, 1], "block": [16, 1, 1], "est. achieved occupancy %": 1}}, {"ph": "f", "id": 7781, "pid": 0, "tid": 7, "ts": 1742522672506995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423364, "dur": 3, "args": {"External id": 7781, "cbid": 211, "correlation": 7781}}, {"ph": "s", "id": 7781, "pid": 494, "tid": 494, "ts": 1742522672423364, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507002, "dur": 5, "args": {"External id": 7791, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7791, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.060606062, "grid": [2, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7791, "pid": 0, "tid": 7, "ts": 1742522672507002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423379, "dur": 3, "args": {"External id": 7791, "cbid": 211, "correlation": 7791}}, {"ph": "s", "id": 7791, "pid": 494, "tid": 494, "ts": 1742522672423379, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672507009, "dur": 1, "args": {"External id": 7804, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7804, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7804, "pid": 0, "tid": 7, "ts": 1742522672507009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423396, "dur": 3, "args": {"External id": 7804, "cbid": 211, "correlation": 7804}}, {"ph": "s", "id": 7804, "pid": 494, "tid": 494, "ts": 1742522672423396, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507012, "dur": 1, "args": {"External id": 7822, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7822, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7822, "pid": 0, "tid": 7, "ts": 1742522672507012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423411, "dur": 3, "args": {"External id": 7822, "cbid": 211, "correlation": 7822}}, {"ph": "s", "id": 7822, "pid": 494, "tid": 494, "ts": 1742522672423411, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507017, "dur": 1, "args": {"External id": 7834, "device": 0, "context": 1, "stream": 7, "correlation": 7834, "bytes": 1024, "memory bandwidth (GB/s)": 0.7447272727272727}}, {"ph": "f", "id": 7834, "pid": 0, "tid": 7, "ts": 1742522672507017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672423429, "dur": 8, "args": {"External id": 7834, "cbid": 41, "correlation": 7834}}, {"ph": "s", "id": 7834, "pid": 494, "tid": 494, "ts": 1742522672423429, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507020, "dur": 1, "args": {"External id": 7846, "device": 0, "context": 1, "stream": 7, "correlation": 7846, "bytes": 512, "memory bandwidth (GB/s)": 0.36363636363636365}}, {"ph": "f", "id": 7846, "pid": 0, "tid": 7, "ts": 1742522672507020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672423449, "dur": 3, "args": {"External id": 7846, "cbid": 41, "correlation": 7846}}, {"ph": "s", "id": 7846, "pid": 494, "tid": 494, "ts": 1742522672423449, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::bitwise_not_kernel_cuda(at::TensorIteratorBase&)::{lambda(bool)#1}, at::detail::Array<char*, 2> >(int, at::native::bitwise_not_kernel_cuda(at::TensorIteratorBase&)::{lambda(bool)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507024, "dur": 1, "args": {"External id": 7857, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7857, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7857, "pid": 0, "tid": 7, "ts": 1742522672507024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423468, "dur": 3, "args": {"External id": 7857, "cbid": 211, "correlation": 7857}}, {"ph": "s", "id": 7857, "pid": 494, "tid": 494, "ts": 1742522672423468, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672507027, "dur": 1, "args": {"External id": 7872, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7872, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7872, "pid": 0, "tid": 7, "ts": 1742522672507027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423483, "dur": 2, "args": {"External id": 7872, "cbid": 211, "correlation": 7872}}, {"ph": "s", "id": 7872, "pid": 494, "tid": 494, "ts": 1742522672423483, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672507030, "dur": 1, "args": {"External id": 7885, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7885, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7885, "pid": 0, "tid": 7, "ts": 1742522672507030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423493, "dur": 2, "args": {"External id": 7885, "cbid": 211, "correlation": 7885}}, {"ph": "s", "id": 7885, "pid": 494, "tid": 494, "ts": 1742522672423493, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507034, "dur": 1, "args": {"External id": 7903, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7903, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7903, "pid": 0, "tid": 7, "ts": 1742522672507034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423505, "dur": 3, "args": {"External id": 7903, "cbid": 211, "correlation": 7903}}, {"ph": "s", "id": 7903, "pid": 494, "tid": 494, "ts": 1742522672423505, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::bitwise_not_kernel_cuda(at::TensorIteratorBase&)::{lambda(bool)#1}, at::detail::Array<char*, 2> >(int, at::native::bitwise_not_kernel_cuda(at::TensorIteratorBase&)::{lambda(bool)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507038, "dur": 1, "args": {"External id": 7914, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7914, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7914, "pid": 0, "tid": 7, "ts": 1742522672507038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423515, "dur": 2, "args": {"External id": 7914, "cbid": 211, "correlation": 7914}}, {"ph": "s", "id": 7914, "pid": 494, "tid": 494, "ts": 1742522672423515, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1> >(int, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672507041, "dur": 3, "args": {"External id": 7940, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7940, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.060606062, "grid": [2, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7940, "pid": 0, "tid": 7, "ts": 1742522672507041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423539, "dur": 4, "args": {"External id": 7940, "cbid": 211, "correlation": 7940}}, {"ph": "s", "id": 7940, "pid": 494, "tid": 494, "ts": 1742522672423539, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507047, "dur": 2, "args": {"External id": 7951, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7951, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.022727273, "warps per SM": 0.09090909, "grid": [3, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7951, "pid": 0, "tid": 7, "ts": 1742522672507047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423550, "dur": 3, "args": {"External id": 7951, "cbid": 211, "correlation": 7951}}, {"ph": "s", "id": 7951, "pid": 494, "tid": 494, "ts": 1742522672423550, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::bitwise_not_kernel_cuda(at::TensorIteratorBase&)::{lambda(bool)#1}, at::detail::Array<char*, 2> >(int, at::native::bitwise_not_kernel_cuda(at::TensorIteratorBase&)::{lambda(bool)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507051, "dur": 1, "args": {"External id": 7962, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7962, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7962, "pid": 0, "tid": 7, "ts": 1742522672507051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423561, "dur": 2, "args": {"External id": 7962, "cbid": 211, "correlation": 7962}}, {"ph": "s", "id": 7962, "pid": 494, "tid": 494, "ts": 1742522672423561, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507054, "dur": 2, "args": {"External id": 7985, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 7985, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.022727273, "warps per SM": 0.09090909, "grid": [3, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 7985, "pid": 0, "tid": 7, "ts": 1742522672507054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423578, "dur": 3, "args": {"External id": 7985, "cbid": 211, "correlation": 7985}}, {"ph": "s", "id": 7985, "pid": 494, "tid": 494, "ts": 1742522672423578, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1> >(int, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672507060, "dur": 1, "args": {"External id": 8003, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8003, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8003, "pid": 0, "tid": 7, "ts": 1742522672507060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423597, "dur": 3, "args": {"External id": 8003, "cbid": 211, "correlation": 8003}}, {"ph": "s", "id": 8003, "pid": 494, "tid": 494, "ts": 1742522672423597, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<long, at::native::func_wrapper_t<long, at::native::sum_functor<long, long, long>::operator()(at::TensorIterator&)::{lambda(long, long)#1}>, unsigned int, long, 4> >(at::native::ReduceOp<long, at::native::func_wrapper_t<long, at::native::sum_functor<long, long, long>::operator()(at::TensorIterator&)::{lambda(long, long)#1}>, unsigned int, long, 4>)", "pid": 0, "tid": 7, "ts": 1742522672507063, "dur": 3, "args": {"External id": 8013, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8013, "registers per thread": 32, "shared memory": 1040, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8013, "pid": 0, "tid": 7, "ts": 1742522672507063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423607, "dur": 4, "args": {"External id": 8013, "cbid": 211, "correlation": 8013}}, {"ph": "s", "id": 8013, "pid": 494, "tid": 494, "ts": 1742522672423607, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507068, "dur": 1, "args": {"External id": 8023, "device": 0, "context": 1, "stream": 7, "correlation": 8023, "bytes": 1024, "memory bandwidth (GB/s)": 0.7111111111111111}}, {"ph": "f", "id": 8023, "pid": 0, "tid": 7, "ts": 1742522672507068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672423636, "dur": 6, "args": {"External id": 8023, "cbid": 41, "correlation": 8023}}, {"ph": "s", "id": 8023, "pid": 494, "tid": 494, "ts": 1742522672423636, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507073, "dur": 1, "args": {"External id": 8035, "device": 0, "context": 1, "stream": 7, "correlation": 8035, "bytes": 512, "memory bandwidth (GB/s)": 0.42105263157894735}}, {"ph": "f", "id": 8035, "pid": 0, "tid": 7, "ts": 1742522672507073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672423653, "dur": 3, "args": {"External id": 8035, "cbid": 41, "correlation": 8035}}, {"ph": "s", "id": 8035, "pid": 494, "tid": 494, "ts": 1742522672423653, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672507076, "dur": 1, "args": {"External id": 8050, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8050, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8050, "pid": 0, "tid": 7, "ts": 1742522672507076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423668, "dur": 3, "args": {"External id": 8050, "cbid": 211, "correlation": 8050}}, {"ph": "s", "id": 8050, "pid": 494, "tid": 494, "ts": 1742522672423668, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507079, "dur": 1, "args": {"External id": 8068, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8068, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8068, "pid": 0, "tid": 7, "ts": 1742522672507079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423693, "dur": 3, "args": {"External id": 8068, "cbid": 211, "correlation": 8068}}, {"ph": "s", "id": 8068, "pid": 494, "tid": 494, "ts": 1742522672423693, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void vllm::apply_penalty_kernel<float, true, 256>(float*, float const*, float const*, float const*, long const*, int**, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672507084, "dur": 69, "args": {"External id": 8074, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8074, "registers per thread": 168, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 8074, "pid": 0, "tid": 7, "ts": 1742522672507084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423702, "dur": 3, "args": {"External id": 8074, "cbid": 211, "correlation": 8074}}, {"ph": "s", "id": 8074, "pid": 494, "tid": 494, "ts": 1742522672423702, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423718, "dur": 1, "args": {"External id": 8093, "cbid": 273, "correlation": 8093}}, {"ph": "f", "id": 8093, "pid": 494, "tid": 494, "ts": 1742522672423718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::topk_kernel<float, unsigned int, 2>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, bool, bool, unsigned int, at::cuda::detail::TensorInfo<float, unsigned int>, at::cuda::detail::TensorInfo<long, unsigned int>)", "pid": 0, "tid": 7, "ts": 1742522672507155, "dur": 154, "args": {"External id": 8094, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8094, "registers per thread": 64, "shared memory": 113216, "blocks per SM": 0.969697, "warps per SM": 31.030304, "grid": [128, 1, 1], "block": [1024, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8094, "pid": 0, "tid": 7, "ts": 1742522672507155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423720, "dur": 3, "args": {"External id": 8094, "cbid": 211, "correlation": 8094}}, {"ph": "s", "id": 8094, "pid": 494, "tid": 494, "ts": 1742522672423720, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507312, "dur": 2, "args": {"External id": 8105, "device": 0, "context": 1, "stream": 7, "correlation": 8105, "bytes": 2097152, "memory bandwidth (GB/s)": 728.1777777777778}}, {"ph": "f", "id": 8105, "pid": 0, "tid": 7, "ts": 1742522672507312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672423734, "dur": 8, "args": {"External id": 8105, "cbid": 41, "correlation": 8105}}, {"ph": "s", "id": 8105, "pid": 494, "tid": 494, "ts": 1742522672423734, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672507318, "dur": 1, "args": {"External id": 8123, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8123, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 0.969697, "grid": [64, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 8123, "pid": 0, "tid": 7, "ts": 1742522672507318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423753, "dur": 3, "args": {"External id": 8123, "cbid": 211, "correlation": 8123}}, {"ph": "s", "id": 8123, "pid": 494, "tid": 494, "ts": 1742522672423753, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507322, "dur": 3, "args": {"External id": 8129, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8129, "registers per thread": 18, "shared memory": 0, "blocks per SM": 15.515152, "warps per SM": 62.060608, "grid": [2048, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 97}}, {"ph": "f", "id": 8129, "pid": 0, "tid": 7, "ts": 1742522672507322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423763, "dur": 3, "args": {"External id": 8129, "cbid": 211, "correlation": 8129}}, {"ph": "s", "id": 8129, "pid": 494, "tid": 494, "ts": 1742522672423763, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507328, "dur": 2, "args": {"External id": 8135, "device": 0, "context": 1, "stream": 7, "correlation": 8135, "bytes": 2097152, "memory bandwidth (GB/s)": 910.2222222222222}}, {"ph": "f", "id": 8135, "pid": 0, "tid": 7, "ts": 1742522672507328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672423770, "dur": 3, "args": {"External id": 8135, "cbid": 41, "correlation": 8135}}, {"ph": "s", "id": 8135, "pid": 494, "tid": 494, "ts": 1742522672423770, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::radixSortKVInPlace<2, -1, 128, 32, float, long, unsigned int>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, unsigned int, at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, bool)", "pid": 0, "tid": 7, "ts": 1742522672507333, "dur": 24, "args": {"External id": 8140, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8140, "registers per thread": 217, "shared memory": 33808, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 8140, "pid": 0, "tid": 7, "ts": 1742522672507333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423777, "dur": 3, "args": {"External id": 8140, "cbid": 211, "correlation": 8140}}, {"ph": "s", "id": 8140, "pid": 494, "tid": 494, "ts": 1742522672423777, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void vllm::mask_top_p_kernel<4, float, float, 256>(float*, float const*, int, int)", "pid": 0, "tid": 7, "ts": 1742522672507360, "dur": 8, "args": {"External id": 8146, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8146, "registers per thread": 32, "shared memory": 1216, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 8146, "pid": 0, "tid": 7, "ts": 1742522672507360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423786, "dur": 3, "args": {"External id": 8146, "cbid": 211, "correlation": 8146}}, {"ph": "s", "id": 8146, "pid": 494, "tid": 494, "ts": 1742522672423786, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672507370, "dur": 1, "args": {"External id": 8164, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8164, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 0.969697, "grid": [64, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 8164, "pid": 0, "tid": 7, "ts": 1742522672507370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423802, "dur": 2, "args": {"External id": 8164, "cbid": 211, "correlation": 8164}}, {"ph": "s", "id": 8164, "pid": 494, "tid": 494, "ts": 1742522672423802, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507374, "dur": 10, "args": {"External id": 8177, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8177, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 8177, "pid": 0, "tid": 7, "ts": 1742522672507374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423825, "dur": 3, "args": {"External id": 8177, "cbid": 211, "correlation": 8177}}, {"ph": "s", "id": 8177, "pid": 494, "tid": 494, "ts": 1742522672423825, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507388, "dur": 5, "args": {"External id": 8187, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8187, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 8187, "pid": 0, "tid": 7, "ts": 1742522672507388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423839, "dur": 2, "args": {"External id": 8187, "cbid": 211, "correlation": 8187}}, {"ph": "s", "id": 8187, "pid": 494, "tid": 494, "ts": 1742522672423839, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::cunn_SoftMaxForward<4, float, float, float, at::native::(anonymous namespace)::SoftMaxForwardEpilogue>(float*, float const*, int)", "pid": 0, "tid": 7, "ts": 1742522672507396, "dur": 5, "args": {"External id": 8198, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8198, "registers per thread": 30, "shared memory": 2048, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 8198, "pid": 0, "tid": 7, "ts": 1742522672507396, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423854, "dur": 3, "args": {"External id": 8198, "cbid": 211, "correlation": 8198}}, {"ph": "s", "id": 8198, "pid": 494, "tid": 494, "ts": 1742522672423854, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::log_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::log_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507403, "dur": 2, "args": {"External id": 8208, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8208, "registers per thread": 22, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 8208, "pid": 0, "tid": 7, "ts": 1742522672507403, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423872, "dur": 3, "args": {"External id": 8208, "cbid": 211, "correlation": 8208}}, {"ph": "s", "id": 8208, "pid": 494, "tid": 494, "ts": 1742522672423872, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672423887, "dur": 0, "args": {"External id": 8226, "cbid": 317, "correlation": 8226}}, {"ph": "f", "id": 8226, "pid": 494, "tid": 494, "ts": 1742522672423887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672507407, "dur": 4, "args": {"External id": 8228, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8228, "registers per thread": 52, "shared memory": 0, "blocks per SM": 8, "warps per SM": 64, "grid": [1056, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 50}}, {"ph": "f", "id": 8228, "pid": 0, "tid": 7, "ts": 1742522672507407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423888, "dur": 4, "args": {"External id": 8228, "cbid": 211, "correlation": 8228}}, {"ph": "s", "id": 8228, "pid": 494, "tid": 494, "ts": 1742522672423888, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> >, at::detail::Array<char*, 3> >(int, at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> >, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672507415, "dur": 3, "args": {"External id": 8234, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8234, "registers per thread": 26, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 8234, "pid": 0, "tid": 7, "ts": 1742522672507415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423902, "dur": 3, "args": {"External id": 8234, "cbid": 211, "correlation": 8234}}, {"ph": "s", "id": 8234, "pid": 494, "tid": 494, "ts": 1742522672423902, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::ArgMaxOps<float>, unsigned int, long, 4> >(at::native::ReduceOp<float, at::native::ArgMaxOps<float>, unsigned int, long, 4>)", "pid": 0, "tid": 7, "ts": 1742522672507421, "dur": 16, "args": {"External id": 8248, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8248, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.060606062, "warps per SM": 0.969697, "grid": [8, 1, 1], "block": [32, 16, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 8248, "pid": 0, "tid": 7, "ts": 1742522672507421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423917, "dur": 3, "args": {"External id": 8248, "cbid": 211, "correlation": 8248}}, {"ph": "s", "id": 8248, "pid": 494, "tid": 494, "ts": 1742522672423917, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507440, "dur": 2, "args": {"External id": 8261, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8261, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8261, "pid": 0, "tid": 7, "ts": 1742522672507440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423937, "dur": 3, "args": {"External id": 8261, "cbid": 211, "correlation": 8261}}, {"ph": "s", "id": 8261, "pid": 494, "tid": 494, "ts": 1742522672423937, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::sbtopk::gatherTopK<float, unsigned int, 2, false>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, bool, unsigned int, unsigned int, at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, float*)", "pid": 0, "tid": 7, "ts": 1742522672507444, "dur": 32, "args": {"External id": 8276, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8276, "registers per thread": 47, "shared memory": 128, "blocks per SM": 0.969697, "warps per SM": 31.030304, "grid": [128, 1, 1], "block": [1024, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 8276, "pid": 0, "tid": 7, "ts": 1742522672507444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423955, "dur": 3, "args": {"External id": 8276, "cbid": 211, "correlation": 8276}}, {"ph": "s", "id": 8276, "pid": 494, "tid": 494, "ts": 1742522672423955, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423960, "dur": 0, "args": {"External id": 8279, "cbid": 200, "correlation": 8279}}, {"ph": "f", "id": 8279, "pid": 494, "tid": 494, "ts": 1742522672423960, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423961, "dur": 0, "args": {"External id": 8280, "cbid": 200, "correlation": 8280}}, {"ph": "f", "id": 8280, "pid": 494, "tid": 494, "ts": 1742522672423961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423961, "dur": 0, "args": {"External id": 8281, "cbid": 200, "correlation": 8281}}, {"ph": "f", "id": 8281, "pid": 494, "tid": 494, "ts": 1742522672423961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaDeviceGetAttribute", "pid": 494, "tid": 494, "ts": 1742522672423961, "dur": 0, "args": {"External id": 8282, "cbid": 200, "correlation": 8282}}, {"ph": "f", "id": 8282, "pid": 494, "tid": 494, "ts": 1742522672423961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncGetAttributes", "pid": 494, "tid": 494, "ts": 1742522672423962, "dur": 3, "args": {"External id": 8283, "cbid": 15, "correlation": 8283}}, {"ph": "f", "id": 8283, "pid": 494, "tid": 494, "ts": 1742522672423962, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423965, "dur": 0, "args": {"External id": 8284, "cbid": 251, "correlation": 8284}}, {"ph": "f", "id": 8284, "pid": 494, "tid": 494, "ts": 1742522672423965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423966, "dur": 0, "args": {"External id": 8285, "cbid": 251, "correlation": 8285}}, {"ph": "f", "id": 8285, "pid": 494, "tid": 494, "ts": 1742522672423966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423967, "dur": 0, "args": {"External id": 8286, "cbid": 251, "correlation": 8286}}, {"ph": "f", "id": 8286, "pid": 494, "tid": 494, "ts": 1742522672423967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423967, "dur": 0, "args": {"External id": 8287, "cbid": 251, "correlation": 8287}}, {"ph": "f", "id": 8287, "pid": 494, "tid": 494, "ts": 1742522672423967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423968, "dur": 0, "args": {"External id": 8288, "cbid": 251, "correlation": 8288}}, {"ph": "f", "id": 8288, "pid": 494, "tid": 494, "ts": 1742522672423968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423968, "dur": 0, "args": {"External id": 8289, "cbid": 251, "correlation": 8289}}, {"ph": "f", "id": 8289, "pid": 494, "tid": 494, "ts": 1742522672423968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423969, "dur": 0, "args": {"External id": 8290, "cbid": 251, "correlation": 8290}}, {"ph": "f", "id": 8290, "pid": 494, "tid": 494, "ts": 1742522672423969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672423969, "dur": 0, "args": {"External id": 8291, "cbid": 251, "correlation": 8291}}, {"ph": "f", "id": 8291, "pid": 494, "tid": 494, "ts": 1742522672423969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::bitonicSortKVInPlace<2, -1, 16, 16, float, long, at::native::GTOp<float, true>, unsigned int>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, unsigned int, at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, at::native::GTOp<float, true>)", "pid": 0, "tid": 7, "ts": 1742522672507479, "dur": 5, "args": {"External id": 8293, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8293, "registers per thread": 38, "shared memory": 6656, "blocks per SM": 0.969697, "warps per SM": 0.4848485, "grid": [128, 1, 1], "block": [16, 1, 1], "est. achieved occupancy %": 1}}, {"ph": "f", "id": 8293, "pid": 0, "tid": 7, "ts": 1742522672507479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423971, "dur": 3, "args": {"External id": 8293, "cbid": 211, "correlation": 8293}}, {"ph": "s", "id": 8293, "pid": 494, "tid": 494, "ts": 1742522672423971, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507487, "dur": 2, "args": {"External id": 8303, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8303, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8303, "pid": 0, "tid": 7, "ts": 1742522672507487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423985, "dur": 3, "args": {"External id": 8303, "cbid": 211, "correlation": 8303}}, {"ph": "s", "id": 8303, "pid": 494, "tid": 494, "ts": 1742522672423985, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507491, "dur": 3, "args": {"External id": 8313, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8313, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.060606062, "grid": [2, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8313, "pid": 0, "tid": 7, "ts": 1742522672507491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672423998, "dur": 2, "args": {"External id": 8313, "cbid": 211, "correlation": 8313}}, {"ph": "s", "id": 8313, "pid": 494, "tid": 494, "ts": 1742522672423998, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507498, "dur": 2, "args": {"External id": 8334, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8334, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.022727273, "warps per SM": 0.09090909, "grid": [3, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8334, "pid": 0, "tid": 7, "ts": 1742522672507498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424019, "dur": 3, "args": {"External id": 8334, "cbid": 211, "correlation": 8334}}, {"ph": "s", "id": 8334, "pid": 494, "tid": 494, "ts": 1742522672424019, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1}>(at::TensorIteratorBase&, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507503, "dur": 2, "args": {"External id": 8355, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8355, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.022727273, "warps per SM": 0.09090909, "grid": [3, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8355, "pid": 0, "tid": 7, "ts": 1742522672507503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424037, "dur": 3, "args": {"External id": 8355, "cbid": 211, "correlation": 8355}}, {"ph": "s", "id": 8355, "pid": 494, "tid": 494, "ts": 1742522672424037, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}, at::detail::Array<char*, 4> >(int, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(bool, long, long)#1}, at::detail::Array<char*, 4>)", "pid": 0, "tid": 7, "ts": 1742522672507507, "dur": 1, "args": {"External id": 8373, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8373, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8373, "pid": 0, "tid": 7, "ts": 1742522672507507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424051, "dur": 3, "args": {"External id": 8373, "cbid": 211, "correlation": 8373}}, {"ph": "s", "id": 8373, "pid": 494, "tid": 494, "ts": 1742522672424051, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1}, at::detail::Array<char*, 4> >(int, at::native::(anonymous namespace)::where_kernel_impl(at::TensorIterator&)::{lambda()#1}::operator()() const::{lambda()#7}::operator()() const::{lambda(bool, float, float)#1}, at::detail::Array<char*, 4>)", "pid": 0, "tid": 7, "ts": 1742522672507512, "dur": 1, "args": {"External id": 8391, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8391, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8391, "pid": 0, "tid": 7, "ts": 1742522672507512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424064, "dur": 4, "args": {"External id": 8391, "cbid": 211, "correlation": 8391}}, {"ph": "s", "id": 8391, "pid": 494, "tid": 494, "ts": 1742522672424064, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::CatArrayBatchedCopy_aligned16_contig<long, unsigned int, 2, 128, 1>(long*, at::native::(anonymous namespace)::CatArrInputTensorMetadata<long, unsigned int, 128, 1>, at::native::(anonymous namespace)::TensorSizeStride<unsigned int, 4u>, int, unsigned int)", "pid": 0, "tid": 7, "ts": 1742522672507515, "dur": 2, "args": {"External id": 8402, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8402, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.060606062, "grid": [1, 2, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8402, "pid": 0, "tid": 7, "ts": 1742522672507515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424081, "dur": 3, "args": {"External id": 8402, "cbid": 211, "correlation": 8402}}, {"ph": "s", "id": 8402, "pid": 494, "tid": 494, "ts": 1742522672424081, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<int>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<int>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672507520, "dur": 1, "args": {"External id": 8422, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8422, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8422, "pid": 0, "tid": 7, "ts": 1742522672507520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424181, "dur": 4, "args": {"External id": 8422, "cbid": 211, "correlation": 8422}}, {"ph": "s", "id": 8422, "pid": 494, "tid": 494, "ts": 1742522672424181, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<int>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<int>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672507523, "dur": 1, "args": {"External id": 8435, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8435, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8435, "pid": 0, "tid": 7, "ts": 1742522672507523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424195, "dur": 2, "args": {"External id": 8435, "cbid": 211, "correlation": 8435}}, {"ph": "s", "id": 8435, "pid": 494, "tid": 494, "ts": 1742522672424195, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1> >(int, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672507527, "dur": 1, "args": {"External id": 8442, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8442, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8442, "pid": 0, "tid": 7, "ts": 1742522672507527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424208, "dur": 5, "args": {"External id": 8442, "cbid": 211, "correlation": 8442}}, {"ph": "s", "id": 8442, "pid": 494, "tid": 494, "ts": 1742522672424208, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<int>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<int>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672507531, "dur": 1, "args": {"External id": 8455, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8455, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8455, "pid": 0, "tid": 7, "ts": 1742522672507531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424230, "dur": 3, "args": {"External id": 8455, "cbid": 211, "correlation": 8455}}, {"ph": "s", "id": 8455, "pid": 494, "tid": 494, "ts": 1742522672424230, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctorOnSelf_add<int>, at::detail::Array<char*, 2> >(int, at::native::CUDAFunctorOnSelf_add<int>, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507535, "dur": 1, "args": {"External id": 8465, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8465, "registers per thread": 22, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8465, "pid": 0, "tid": 7, "ts": 1742522672507535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424249, "dur": 4, "args": {"External id": 8465, "cbid": 211, "correlation": 8465}}, {"ph": "s", "id": 8465, "pid": 494, "tid": 494, "ts": 1742522672424249, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctorOnSelf_add<int>, at::detail::Array<char*, 2> >(int, at::native::CUDAFunctorOnSelf_add<int>, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507538, "dur": 1, "args": {"External id": 8475, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8475, "registers per thread": 22, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8475, "pid": 0, "tid": 7, "ts": 1742522672507538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424261, "dur": 2, "args": {"External id": 8475, "cbid": 211, "correlation": 8475}}, {"ph": "s", "id": 8475, "pid": 494, "tid": 494, "ts": 1742522672424261, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::(anonymous namespace)::clamp_kernel_impl(at::TensorIteratorBase&)::{lambda()#1}::operator()() const::{lambda()#3}::operator()() const::{lambda(int, int, int)#1}, at::detail::Array<char*, 4> >(int, at::native::(anonymous namespace)::clamp_kernel_impl(at::TensorIteratorBase&)::{lambda()#1}::operator()() const::{lambda()#3}::operator()() const::{lambda(int, int, int)#1}, at::detail::Array<char*, 4>)", "pid": 0, "tid": 7, "ts": 1742522672507542, "dur": 1, "args": {"External id": 8485, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8485, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8485, "pid": 0, "tid": 7, "ts": 1742522672507542, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424276, "dur": 4, "args": {"External id": 8485, "cbid": 211, "correlation": 8485}}, {"ph": "s", "id": 8485, "pid": 494, "tid": 494, "ts": 1742522672424276, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<int>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<int>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672507546, "dur": 1, "args": {"External id": 8495, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8495, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8495, "pid": 0, "tid": 7, "ts": 1742522672507546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424287, "dur": 2, "args": {"External id": 8495, "cbid": 211, "correlation": 8495}}, {"ph": "s", "id": 8495, "pid": 494, "tid": 494, "ts": 1742522672424287, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507550, "dur": 1, "args": {"External id": 8508, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8508, "registers per thread": 18, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8508, "pid": 0, "tid": 7, "ts": 1742522672507550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424313, "dur": 3, "args": {"External id": 8508, "cbid": 211, "correlation": 8508}}, {"ph": "s", "id": 8508, "pid": 494, "tid": 494, "ts": 1742522672424313, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1> >(int, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672507554, "dur": 2, "args": {"External id": 8519, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8519, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8519, "pid": 0, "tid": 7, "ts": 1742522672507554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424327, "dur": 4, "args": {"External id": 8519, "cbid": 211, "correlation": 8519}}, {"ph": "s", "id": 8519, "pid": 494, "tid": 494, "ts": 1742522672424327, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1> >(int, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672507560, "dur": 1, "args": {"External id": 8532, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8532, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8532, "pid": 0, "tid": 7, "ts": 1742522672507560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424342, "dur": 2, "args": {"External id": 8532, "cbid": 211, "correlation": 8532}}, {"ph": "s", "id": 8532, "pid": 494, "tid": 494, "ts": 1742522672424342, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672507564, "dur": 1, "args": {"External id": 8550, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8550, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.030303031, "grid": [2, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8550, "pid": 0, "tid": 7, "ts": 1742522672507564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424358, "dur": 3, "args": {"External id": 8550, "cbid": 211, "correlation": 8550}}, {"ph": "s", "id": 8550, "pid": 494, "tid": 494, "ts": 1742522672424358, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2> >(int, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507567, "dur": 1, "args": {"External id": 8560, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8560, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8560, "pid": 0, "tid": 7, "ts": 1742522672507567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424380, "dur": 4, "args": {"External id": 8560, "cbid": 211, "correlation": 8560}}, {"ph": "s", "id": 8560, "pid": 494, "tid": 494, "ts": 1742522672424380, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672507570, "dur": 1, "args": {"External id": 8570, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8570, "registers per thread": 30, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8570, "pid": 0, "tid": 7, "ts": 1742522672507570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424391, "dur": 4, "args": {"External id": 8570, "cbid": 211, "correlation": 8570}}, {"ph": "s", "id": 8570, "pid": 494, "tid": 494, "ts": 1742522672424391, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672507575, "dur": 1, "args": {"External id": 8591, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8591, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.030303031, "grid": [2, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8591, "pid": 0, "tid": 7, "ts": 1742522672507575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424413, "dur": 3, "args": {"External id": 8591, "cbid": 211, "correlation": 8591}}, {"ph": "s", "id": 8591, "pid": 494, "tid": 494, "ts": 1742522672424413, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2> >(int, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507578, "dur": 1, "args": {"External id": 8601, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8601, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8601, "pid": 0, "tid": 7, "ts": 1742522672507578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424422, "dur": 2, "args": {"External id": 8601, "cbid": 211, "correlation": 8601}}, {"ph": "s", "id": 8601, "pid": 494, "tid": 494, "ts": 1742522672424422, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2> >(int, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507581, "dur": 1, "args": {"External id": 8611, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8611, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8611, "pid": 0, "tid": 7, "ts": 1742522672507581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424432, "dur": 2, "args": {"External id": 8611, "cbid": 211, "correlation": 8611}}, {"ph": "s", "id": 8611, "pid": 494, "tid": 494, "ts": 1742522672424432, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1> >(int, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672507585, "dur": 2, "args": {"External id": 8621, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8621, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8621, "pid": 0, "tid": 7, "ts": 1742522672507585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424440, "dur": 2, "args": {"External id": 8621, "cbid": 211, "correlation": 8621}}, {"ph": "s", "id": 8621, "pid": 494, "tid": 494, "ts": 1742522672424440, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507589, "dur": 2, "args": {"External id": 8634, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8634, "registers per thread": 18, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8634, "pid": 0, "tid": 7, "ts": 1742522672507589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424459, "dur": 3, "args": {"External id": 8634, "cbid": 211, "correlation": 8634}}, {"ph": "s", "id": 8634, "pid": 494, "tid": 494, "ts": 1742522672424459, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672507594, "dur": 1, "args": {"External id": 8645, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8645, "registers per thread": 30, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8645, "pid": 0, "tid": 7, "ts": 1742522672507594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424471, "dur": 2, "args": {"External id": 8645, "cbid": 211, "correlation": 8645}}, {"ph": "s", "id": 8645, "pid": 494, "tid": 494, "ts": 1742522672424471, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor<long, long, long, at::native::binary_internal::div_floor_kernel_cuda(at::TensorIteratorBase&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long, long)#1}>, at::detail::Array<char*, 2> >(int, at::native::BUnaryFunctor<long, long, long, at::native::binary_internal::div_floor_kernel_cuda(at::TensorIteratorBase&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long, long)#1}>, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507597, "dur": 1, "args": {"External id": 8658, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8658, "registers per thread": 33, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8658, "pid": 0, "tid": 7, "ts": 1742522672507597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424499, "dur": 4, "args": {"External id": 8658, "cbid": 211, "correlation": 8658}}, {"ph": "s", "id": 8658, "pid": 494, "tid": 494, "ts": 1742522672424499, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507601, "dur": 3, "args": {"External id": 8668, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8668, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8668, "pid": 0, "tid": 7, "ts": 1742522672507601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424514, "dur": 3, "args": {"External id": 8668, "cbid": 211, "correlation": 8668}}, {"ph": "s", "id": 8668, "pid": 494, "tid": 494, "ts": 1742522672424514, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AUnaryFunctor<int, int, int, at::native::binary_internal::MulFunctor<int> >, at::detail::Array<char*, 2> >(int, at::native::AUnaryFunctor<int, int, int, at::native::binary_internal::MulFunctor<int> >, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507607, "dur": 1, "args": {"External id": 8678, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8678, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8678, "pid": 0, "tid": 7, "ts": 1742522672507607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424527, "dur": 3, "args": {"External id": 8678, "cbid": 211, "correlation": 8678}}, {"ph": "s", "id": 8678, "pid": 494, "tid": 494, "ts": 1742522672424527, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::BUnaryFunctor<long, long, long, at::native::remainder_kernel_cuda(at::TensorIteratorBase&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long, long)#1}>, at::detail::Array<char*, 2> >(int, at::native::BUnaryFunctor<long, long, long, at::native::remainder_kernel_cuda(at::TensorIteratorBase&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long, long)#1}>, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507611, "dur": 1, "args": {"External id": 8688, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8688, "registers per thread": 36, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8688, "pid": 0, "tid": 7, "ts": 1742522672507611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424549, "dur": 5, "args": {"External id": 8688, "cbid": 211, "correlation": 8688}}, {"ph": "s", "id": 8688, "pid": 494, "tid": 494, "ts": 1742522672424549, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1> >(int, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672507615, "dur": 2, "args": {"External id": 8698, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8698, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8698, "pid": 0, "tid": 7, "ts": 1742522672507615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424560, "dur": 3, "args": {"External id": 8698, "cbid": 211, "correlation": 8698}}, {"ph": "s", "id": 8698, "pid": 494, "tid": 494, "ts": 1742522672424560, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507620, "dur": 1, "args": {"External id": 8710, "device": 0, "context": 1, "stream": 7, "correlation": 8710, "bytes": 2048, "memory bandwidth (GB/s)": 1.453513129879347}}, {"ph": "f", "id": 8710, "pid": 0, "tid": 7, "ts": 1742522672507620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672424585, "dur": 8, "args": {"External id": 8710, "cbid": 41, "correlation": 8710}}, {"ph": "s", "id": 8710, "pid": 494, "tid": 494, "ts": 1742522672424585, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::CatArrayBatchedCopy<c10::BFloat16, unsigned int, 3, 128, 1>(c10::BFloat16*, at::native::(anonymous namespace)::CatArrInputTensorMetadata<c10::BFloat16, unsigned int, 128, 1>, at::native::(anonymous namespace)::TensorSizeStride<unsigned int, 4u>, int, unsigned int)", "pid": 0, "tid": 7, "ts": 1742522672507624, "dur": 8, "args": {"External id": 8727, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8727, "registers per thread": 28, "shared memory": 0, "blocks per SM": 4, "warps per SM": 64, "grid": [264, 2, 1], "block": [512, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 8727, "pid": 0, "tid": 7, "ts": 1742522672507624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424631, "dur": 5, "args": {"External id": 8727, "cbid": 211, "correlation": 8727}}, {"ph": "s", "id": 8727, "pid": 494, "tid": 494, "ts": 1742522672424631, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctorOnOther_add<int>, at::detail::Array<char*, 2> >(int, at::native::CUDAFunctorOnOther_add<int>, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507635, "dur": 1, "args": {"External id": 8740, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8740, "registers per thread": 20, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8740, "pid": 0, "tid": 7, "ts": 1742522672507635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424661, "dur": 3, "args": {"External id": 8740, "cbid": 211, "correlation": 8740}}, {"ph": "s", "id": 8740, "pid": 494, "tid": 494, "ts": 1742522672424661, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672507638, "dur": 1, "args": {"External id": 8758, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8758, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.030303031, "grid": [2, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8758, "pid": 0, "tid": 7, "ts": 1742522672507638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424678, "dur": 3, "args": {"External id": 8758, "cbid": 211, "correlation": 8758}}, {"ph": "s", "id": 8758, "pid": 494, "tid": 494, "ts": 1742522672424678, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2> >(int, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507642, "dur": 1, "args": {"External id": 8768, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8768, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8768, "pid": 0, "tid": 7, "ts": 1742522672507642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424692, "dur": 3, "args": {"External id": 8768, "cbid": 211, "correlation": 8768}}, {"ph": "s", "id": 8768, "pid": 494, "tid": 494, "ts": 1742522672424692, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1> >(int, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>, TrivialOffsetCalculator<2, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<2>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672507646, "dur": 1, "args": {"External id": 8778, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8778, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8778, "pid": 0, "tid": 7, "ts": 1742522672507646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424703, "dur": 2, "args": {"External id": 8778, "cbid": 211, "correlation": 8778}}, {"ph": "s", "id": 8778, "pid": 494, "tid": 494, "ts": 1742522672424703, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672507650, "dur": 1, "args": {"External id": 8799, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8799, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.015151516, "grid": [1, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8799, "pid": 0, "tid": 7, "ts": 1742522672507650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424725, "dur": 3, "args": {"External id": 8799, "cbid": 211, "correlation": 8799}}, {"ph": "s", "id": 8799, "pid": 494, "tid": 494, "ts": 1742522672424725, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::CUDAFunctor_add<long> >(at::TensorIteratorBase&, at::native::CUDAFunctor_add<long> const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::CUDAFunctor_add<long> >(at::TensorIteratorBase&, at::native::CUDAFunctor_add<long> const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507654, "dur": 1, "args": {"External id": 8812, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8812, "registers per thread": 18, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8812, "pid": 0, "tid": 7, "ts": 1742522672507654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424741, "dur": 4, "args": {"External id": 8812, "cbid": 211, "correlation": 8812}}, {"ph": "s", "id": 8812, "pid": 494, "tid": 494, "ts": 1742522672424741, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctorOnSelf_add<long>, at::detail::Array<char*, 2> >(int, at::native::CUDAFunctorOnSelf_add<long>, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507658, "dur": 1, "args": {"External id": 8822, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8822, "registers per thread": 22, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8822, "pid": 0, "tid": 7, "ts": 1742522672507658, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424757, "dur": 3, "args": {"External id": 8822, "cbid": 211, "correlation": 8822}}, {"ph": "s", "id": 8822, "pid": 494, "tid": 494, "ts": 1742522672424757, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctorOnSelf_add<long>, at::detail::Array<char*, 2> >(int, at::native::CUDAFunctorOnSelf_add<long>, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507663, "dur": 1, "args": {"External id": 8832, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8832, "registers per thread": 22, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8832, "pid": 0, "tid": 7, "ts": 1742522672507663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424768, "dur": 2, "args": {"External id": 8832, "cbid": 211, "correlation": 8832}}, {"ph": "s", "id": 8832, "pid": 494, "tid": 494, "ts": 1742522672424768, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672507667, "dur": 1, "args": {"External id": 8850, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8850, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.030303031, "grid": [2, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8850, "pid": 0, "tid": 7, "ts": 1742522672507667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424780, "dur": 2, "args": {"External id": 8850, "cbid": 211, "correlation": 8850}}, {"ph": "s", "id": 8850, "pid": 494, "tid": 494, "ts": 1742522672424780, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2> >(int, at::native::AUnaryFunctor<long, long, long, at::native::binary_internal::MulFunctor<long> >, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672507670, "dur": 1, "args": {"External id": 8860, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8860, "registers per thread": 26, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8860, "pid": 0, "tid": 7, "ts": 1742522672507670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424792, "dur": 2, "args": {"External id": 8860, "cbid": 211, "correlation": 8860}}, {"ph": "s", "id": 8860, "pid": 494, "tid": 494, "ts": 1742522672424792, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<long>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672507673, "dur": 1, "args": {"External id": 8870, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8870, "registers per thread": 30, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8870, "pid": 0, "tid": 7, "ts": 1742522672507673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424801, "dur": 2, "args": {"External id": 8870, "cbid": 211, "correlation": 8870}}, {"ph": "s", "id": 8870, "pid": 494, "tid": 494, "ts": 1742522672424801, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672507677, "dur": 1, "args": {"External id": 8907, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8907, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8907, "pid": 0, "tid": 7, "ts": 1742522672507677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424842, "dur": 3, "args": {"External id": 8907, "cbid": 211, "correlation": 8907}}, {"ph": "s", "id": 8907, "pid": 494, "tid": 494, "ts": 1742522672424842, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507680, "dur": 2, "args": {"External id": 8919, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8919, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 8919, "pid": 0, "tid": 7, "ts": 1742522672507680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424860, "dur": 3, "args": {"External id": 8919, "cbid": 211, "correlation": 8919}}, {"ph": "s", "id": 8919, "pid": 494, "tid": 494, "ts": 1742522672424860, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507686, "dur": 1, "args": {"External id": 8925, "device": 0, "context": 1, "stream": 7, "correlation": 8925, "bytes": 2048, "memory bandwidth (GB/s)": 1.6828266228430566}}, {"ph": "f", "id": 8925, "pid": 0, "tid": 7, "ts": 1742522672507686, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672424876, "dur": 6, "args": {"External id": 8925, "cbid": 41, "correlation": 8925}}, {"ph": "s", "id": 8925, "pid": 494, "tid": 494, "ts": 1742522672424876, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507689, "dur": 6, "args": {"External id": 8943, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8943, "registers per thread": 18, "shared memory": 0, "blocks per SM": 54.303032, "warps per SM": 217.21213, "grid": [7168, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 8943, "pid": 0, "tid": 7, "ts": 1742522672507689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424916, "dur": 3, "args": {"External id": 8943, "cbid": 211, "correlation": 8943}}, {"ph": "s", "id": 8943, "pid": 494, "tid": 494, "ts": 1742522672424916, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<2> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<2> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507698, "dur": 8, "args": {"External id": 8964, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 8964, "registers per thread": 32, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 8964, "pid": 0, "tid": 7, "ts": 1742522672507698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672424947, "dur": 5, "args": {"External id": 8964, "cbid": 211, "correlation": 8964}}, {"ph": "s", "id": 8964, "pid": 494, "tid": 494, "ts": 1742522672424947, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507709, "dur": 3, "args": {"External id": 8970, "device": 0, "context": 1, "stream": 7, "correlation": 8970, "bytes": 3670016, "memory bandwidth (GB/s)": 1103.1006913134956}}, {"ph": "f", "id": 8970, "pid": 0, "tid": 7, "ts": 1742522672507709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672424964, "dur": 7, "args": {"External id": 8970, "cbid": 41, "correlation": 8970}}, {"ph": "s", "id": 8970, "pid": 494, "tid": 494, "ts": 1742522672424964, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507715, "dur": 1, "args": {"External id": 9911, "device": 0, "context": 1, "stream": 7, "correlation": 9911, "bytes": 1024, "memory bandwidth (GB/s)": 0.7798933739527799}}, {"ph": "f", "id": 9911, "pid": 0, "tid": 7, "ts": 1742522672507715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672425395, "dur": 8, "args": {"External id": 9911, "cbid": 41, "correlation": 9911}}, {"ph": "s", "id": 9911, "pid": 494, "tid": 494, "ts": 1742522672425395, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672507719, "dur": 1, "args": {"External id": 9925, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 9925, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 9925, "pid": 0, "tid": 7, "ts": 1742522672507719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672425420, "dur": 4, "args": {"External id": 9925, "cbid": 211, "correlation": 9925}}, {"ph": "s", "id": 9925, "pid": 494, "tid": 494, "ts": 1742522672425420, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507722, "dur": 1, "args": {"External id": 9936, "device": 0, "context": 1, "stream": 7, "correlation": 9936, "bytes": 2048, "memory bandwidth (GB/s)": 1.4872912127814089}}, {"ph": "f", "id": 9936, "pid": 0, "tid": 7, "ts": 1742522672507722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672425436, "dur": 4, "args": {"External id": 9936, "cbid": 41, "correlation": 9936}}, {"ph": "s", "id": 9936, "pid": 494, "tid": 494, "ts": 1742522672425436, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507725, "dur": 1, "args": {"External id": 9953, "device": 0, "context": 1, "stream": 7, "correlation": 9953, "bytes": 2040, "memory bandwidth (GB/s)": 1.516728624535316}}, {"ph": "f", "id": 9953, "pid": 0, "tid": 7, "ts": 1742522672507725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672425456, "dur": 4, "args": {"External id": 9953, "cbid": 41, "correlation": 9953}}, {"ph": "s", "id": 9953, "pid": 494, "tid": 494, "ts": 1742522672425456, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672507728, "dur": 1, "args": {"External id": 9959, "device": 0, "context": 1, "stream": 7, "correlation": 9959, "bytes": 2040, "memory bandwidth (GB/s)": 1.7215189873417722}}, {"ph": "f", "id": 9959, "pid": 0, "tid": 7, "ts": 1742522672507728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672425466, "dur": 3, "args": {"External id": 9959, "cbid": 41, "correlation": 9959}}, {"ph": "s", "id": 9959, "pid": 494, "tid": 494, "ts": 1742522672425466, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_put_kernel_impl<at::native::OpaqueType<8> >(at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_put_kernel_impl<at::native::OpaqueType<8> >(at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_put_kernel_impl<at::native::OpaqueType<8> >(at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_put_kernel_impl<at::native::OpaqueType<8> >(at::TensorIterator&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672507732, "dur": 3, "args": {"External id": 9968, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 9968, "registers per thread": 40, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 9968, "pid": 0, "tid": 7, "ts": 1742522672507732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672425495, "dur": 4, "args": {"External id": 9968, "cbid": 211, "correlation": 9968}}, {"ph": "s", "id": 9968, "pid": 494, "tid": 494, "ts": 1742522672425495, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::indexSelectLargeIndex<c10::BFloat16, long, unsigned int, 2, 2, -2, true>(at::cuda::detail::TensorInfo<c10::BFloat16, unsigned int>, at::cuda::detail::TensorInfo<c10::BFloat16, unsigned int>, at::cuda::detail::TensorInfo<long, unsigned int>, int, int, unsigned int, unsigned int, long)", "pid": 0, "tid": 7, "ts": 1742522672507737, "dur": 12, "args": {"External id": 9990, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 9990, "registers per thread": 32, "shared memory": 0, "blocks per SM": 8, "warps per SM": 32, "grid": [1056, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 50}}, {"ph": "f", "id": 9990, "pid": 0, "tid": 7, "ts": 1742522672507737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672425548, "dur": 4, "args": {"External id": 9990, "cbid": 211, "correlation": 9990}}, {"ph": "s", "id": 9990, "pid": 494, "tid": 494, "ts": 1742522672425548, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672507753, "dur": 3, "args": {"External id": 10006, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10006, "registers per thread": 84, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 10006, "pid": 0, "tid": 7, "ts": 1742522672507753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672425637, "dur": 4, "args": {"External id": 10006, "cbid": 307, "correlation": 10006}}, {"ph": "s", "id": 10006, "pid": 494, "tid": 494, "ts": 1742522672425637, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672507758, "dur": 3, "args": {"External id": 10020, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10020, "registers per thread": 84, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 10020, "pid": 0, "tid": 7, "ts": 1742522672507758, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672425703, "dur": 4, "args": {"External id": 10020, "cbid": 307, "correlation": 10020}}, {"ph": "s", "id": 10020, "pid": 494, "tid": 494, "ts": 1742522672425703, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::CatArrayBatchedCopy<c10::BFloat16, unsigned int, 2, 128, 1>(c10::BFloat16*, at::native::(anonymous namespace)::CatArrInputTensorMetadata<c10::BFloat16, unsigned int, 128, 1>, at::native::(anonymous namespace)::TensorSizeStride<unsigned int, 4u>, int, unsigned int)", "pid": 0, "tid": 7, "ts": 1742522672507764, "dur": 7, "args": {"External id": 10029, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10029, "registers per thread": 21, "shared memory": 0, "blocks per SM": 4, "warps per SM": 64, "grid": [264, 2, 1], "block": [512, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 10029, "pid": 0, "tid": 7, "ts": 1742522672507764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672425726, "dur": 4, "args": {"External id": 10029, "cbid": 211, "correlation": 10029}}, {"ph": "s", "id": 10029, "pid": 494, "tid": 494, "ts": 1742522672425726, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncGetAttributes", "pid": 494, "tid": 494, "ts": 1742522672425780, "dur": 4, "args": {"External id": 10042, "cbid": 15, "correlation": 10042}}, {"ph": "f", "id": 10042, "pid": 494, "tid": 494, "ts": 1742522672425780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672425794, "dur": 1, "args": {"External id": 10043, "cbid": 273, "correlation": 10043}}, {"ph": "f", "id": 10043, "pid": 494, "tid": 494, "ts": 1742522672425794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672425796, "dur": 0, "args": {"External id": 10044, "cbid": 273, "correlation": 10044}}, {"ph": "f", "id": 10044, "pid": 494, "tid": 494, "ts": 1742522672425796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672425796, "dur": 0, "args": {"External id": 10045, "cbid": 273, "correlation": 10045}}, {"ph": "f", "id": 10045, "pid": 494, "tid": 494, "ts": 1742522672425796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "sm90_xmma_gemm_bf16bf16_bf16f32_f32_tn_n_tilesize128x128x64_warpgroupsize1x1x1_execute_segment_k_off_kernel__5x_cublas", "pid": 0, "tid": 7, "ts": 1742522672507773, "dur": 93, "args": {"External id": 10046, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10046, "registers per thread": 168, "shared memory": 231424, "blocks per SM": 0.8484849, "warps per SM": 10.181818, "grid": [56, 2, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10046, "pid": 0, "tid": 7, "ts": 1742522672507773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672425797, "dur": 6, "args": {"External id": 10046, "cbid": 430, "correlation": 10046}}, {"ph": "s", "id": 10046, "pid": 494, "tid": 494, "ts": 1742522672425797, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672507868, "dur": 5, "args": {"External id": 10071, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10071, "registers per thread": 86, "shared memory": 256, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 10071, "pid": 0, "tid": 7, "ts": 1742522672507868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672425899, "dur": 6, "args": {"External id": 10071, "cbid": 307, "correlation": 10071}}, {"ph": "s", "id": 10071, "pid": 494, "tid": 494, "ts": 1742522672425899, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426070, "dur": 2, "args": {"External id": 10083, "cbid": 468, "correlation": 10083}}, {"ph": "f", "id": 10083, "pid": 494, "tid": 494, "ts": 1742522672426070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426076, "dur": 0, "args": {"External id": 10085, "cbid": 468, "correlation": 10085}}, {"ph": "f", "id": 10085, "pid": 494, "tid": 494, "ts": 1742522672426076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426076, "dur": 0, "args": {"External id": 10087, "cbid": 468, "correlation": 10087}}, {"ph": "f", "id": 10087, "pid": 494, "tid": 494, "ts": 1742522672426076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426077, "dur": 0, "args": {"External id": 10089, "cbid": 468, "correlation": 10089}}, {"ph": "f", "id": 10089, "pid": 494, "tid": 494, "ts": 1742522672426077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672426077, "dur": 3, "args": {"External id": 10091, "cbid": 273, "correlation": 10091}}, {"ph": "f", "id": 10091, "pid": 494, "tid": 494, "ts": 1742522672426077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<2112u, 7168u, 128u, 32u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672507874, "dur": 13, "args": {"External id": 10092, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10092, "registers per thread": 168, "shared memory": 176480, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10092, "pid": 0, "tid": 7, "ts": 1742522672507874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672426082, "dur": 8, "args": {"External id": 10092, "cbid": 430, "correlation": 10092}}, {"ph": "s", "id": 10092, "pid": 494, "tid": 494, "ts": 1742522672426082, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672507890, "dur": 2, "args": {"External id": 10101, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10101, "registers per thread": 32, "shared memory": 16, "blocks per SM": 3.878788, "warps per SM": 15.515152, "grid": [512, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 10101, "pid": 0, "tid": 7, "ts": 1742522672507890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426155, "dur": 6, "args": {"External id": 10101, "cbid": 307, "correlation": 10101}}, {"ph": "s", "id": 10101, "pid": 494, "tid": 494, "ts": 1742522672426155, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672507895, "dur": 2, "args": {"External id": 10124, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10124, "registers per thread": 30, "shared memory": 0, "blocks per SM": 5.818182, "warps per SM": 23.272728, "grid": [768, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 36}}, {"ph": "f", "id": 10124, "pid": 0, "tid": 7, "ts": 1742522672507895, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426221, "dur": 5, "args": {"External id": 10124, "cbid": 211, "correlation": 10124}}, {"ph": "s", "id": 10124, "pid": 494, "tid": 494, "ts": 1742522672426221, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426312, "dur": 0, "args": {"External id": 10136, "cbid": 468, "correlation": 10136}}, {"ph": "f", "id": 10136, "pid": 494, "tid": 494, "ts": 1742522672426312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426313, "dur": 0, "args": {"External id": 10138, "cbid": 468, "correlation": 10138}}, {"ph": "f", "id": 10138, "pid": 494, "tid": 494, "ts": 1742522672426313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426314, "dur": 0, "args": {"External id": 10140, "cbid": 468, "correlation": 10140}}, {"ph": "f", "id": 10140, "pid": 494, "tid": 494, "ts": 1742522672426314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426314, "dur": 0, "args": {"External id": 10142, "cbid": 468, "correlation": 10142}}, {"ph": "f", "id": 10142, "pid": 494, "tid": 494, "ts": 1742522672426314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672426315, "dur": 3, "args": {"External id": 10144, "cbid": 273, "correlation": 10144}}, {"ph": "f", "id": 10144, "pid": 494, "tid": 494, "ts": 1742522672426315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<24576u, 1536u, 128u, 128u, 128u, 5u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672507898, "dur": 25, "args": {"External id": 10145, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10145, "registers per thread": 168, "shared memory": 199296, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10145, "pid": 0, "tid": 7, "ts": 1742522672507898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672426319, "dur": 6, "args": {"External id": 10145, "cbid": 430, "correlation": 10145}}, {"ph": "s", "id": 10145, "pid": 494, "tid": 494, "ts": 1742522672426319, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672426397, "dur": 2, "args": {"External id": 10157, "cbid": 251, "correlation": 10157}}, {"ph": "f", "id": 10157, "pid": 494, "tid": 494, "ts": 1742522672426397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672426401, "dur": 0, "args": {"External id": 10158, "cbid": 273, "correlation": 10158}}, {"ph": "f", "id": 10158, "pid": 494, "tid": 494, "ts": 1742522672426401, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_nn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672507926, "dur": 30, "args": {"External id": 10159, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10159, "registers per thread": 232, "shared memory": 98304, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [8, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10159, "pid": 0, "tid": 7, "ts": 1742522672507926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426403, "dur": 6, "args": {"External id": 10159, "cbid": 211, "correlation": 10159}}, {"ph": "s", "id": 10159, "pid": 494, "tid": 494, "ts": 1742522672426403, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void vllm::rotary_embedding_with_kv_cache_kernel<c10::BFloat16, false, true, false, 32>(long const*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, c10::BFloat16*, long const*, float const*, int, int, int, int, int, int, long, long, long, long, long, long, long, long, long)", "pid": 0, "tid": 7, "ts": 1742522672507958, "dur": 9, "args": {"External id": 10166, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10166, "registers per thread": 32, "shared memory": 256, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 10166, "pid": 0, "tid": 7, "ts": 1742522672507958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426435, "dur": 5, "args": {"External id": 10166, "cbid": 211, "correlation": 10166}}, {"ph": "s", "id": 10166, "pid": 494, "tid": 494, "ts": 1742522672426435, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "get_mla_metadata_kernel(Mla_metadata_params)", "pid": 0, "tid": 7, "ts": 1742522672507970, "dur": 11, "args": {"External id": 10187, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10187, "registers per thread": 32, "shared memory": 32768, "blocks per SM": 0.007575758, "warps per SM": 0.007575758, "grid": [1, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10187, "pid": 0, "tid": 7, "ts": 1742522672507970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426492, "dur": 4, "args": {"External id": 10187, "cbid": 211, "correlation": 10187}}, {"ph": "s", "id": 10187, "pid": 494, "tid": 494, "ts": 1742522672426492, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672426531, "dur": 1, "args": {"External id": 10222, "cbid": 273, "correlation": 10222}}, {"ph": "f", "id": 10222, "pid": 494, "tid": 494, "ts": 1742522672426531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_kernel<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t>, true, flash::SharedStorageMLA<Flash_fwd_kernel_traits_mla<576, 64, 64, 8, cutlass::bfloat16_t, 512, true, 0, cutlass::bfloat16_t, cutlass::bfloat16_t> > >(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672507985, "dur": 467, "args": {"External id": 10223, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10223, "registers per thread": 248, "shared memory": 230400, "blocks per SM": 1, "warps per SM": 8, "grid": [4, 1, 33], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10223, "pid": 0, "tid": 7, "ts": 1742522672507985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426532, "dur": 3, "args": {"External id": 10223, "cbid": 211, "correlation": 10223}}, {"ph": "s", "id": 10223, "pid": 494, "tid": 494, "ts": 1742522672426532, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void flash::flash_fwd_splitkv_mla_combine_kernel<cutlass::bfloat16_t, float, long, 512, 64>(Flash_fwd_mla_params)", "pid": 0, "tid": 7, "ts": 1742522672508455, "dur": 25, "args": {"External id": 10225, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10225, "registers per thread": 48, "shared memory": 256, "blocks per SM": 248.24243, "warps per SM": 992.9697, "grid": [32768, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 63}}, {"ph": "f", "id": 10225, "pid": 0, "tid": 7, "ts": 1742522672508455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426537, "dur": 3, "args": {"External id": 10225, "cbid": 211, "correlation": 10225}}, {"ph": "s", "id": 10225, "pid": 494, "tid": 494, "ts": 1742522672426537, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672426580, "dur": 1, "args": {"External id": 10237, "cbid": 251, "correlation": 10237}}, {"ph": "f", "id": 10237, "pid": 494, "tid": 494, "ts": 1742522672426580, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672426583, "dur": 0, "args": {"External id": 10238, "cbid": 273, "correlation": 10238}}, {"ph": "f", "id": 10238, "pid": 494, "tid": 494, "ts": 1742522672426583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8>(cutlass_80_tensorop_bf16_s16816gemm_bf16_128x128_64x3_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672508483, "dur": 25, "args": {"External id": 10239, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10239, "registers per thread": 228, "shared memory": 98304, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [2, 1, 128], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10239, "pid": 0, "tid": 7, "ts": 1742522672508483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426584, "dur": 4, "args": {"External id": 10239, "cbid": 211, "correlation": 10239}}, {"ph": "s", "id": 10239, "pid": 494, "tid": 494, "ts": 1742522672426584, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672508511, "dur": 9, "args": {"External id": 10256, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10256, "registers per thread": 30, "shared memory": 0, "blocks per SM": 62.060608, "warps per SM": 248.24243, "grid": [8192, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 10256, "pid": 0, "tid": 7, "ts": 1742522672508511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426619, "dur": 3, "args": {"External id": 10256, "cbid": 211, "correlation": 10256}}, {"ph": "s", "id": 10256, "pid": 494, "tid": 494, "ts": 1742522672426619, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426714, "dur": 0, "args": {"External id": 10268, "cbid": 468, "correlation": 10268}}, {"ph": "f", "id": 10268, "pid": 494, "tid": 494, "ts": 1742522672426714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426715, "dur": 0, "args": {"External id": 10270, "cbid": 468, "correlation": 10270}}, {"ph": "f", "id": 10270, "pid": 494, "tid": 494, "ts": 1742522672426715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426716, "dur": 0, "args": {"External id": 10272, "cbid": 468, "correlation": 10272}}, {"ph": "f", "id": 10272, "pid": 494, "tid": 494, "ts": 1742522672426716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672426716, "dur": 0, "args": {"External id": 10274, "cbid": 468, "correlation": 10274}}, {"ph": "f", "id": 10274, "pid": 494, "tid": 494, "ts": 1742522672426716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672426717, "dur": 2, "args": {"External id": 10276, "cbid": 273, "correlation": 10276}}, {"ph": "f", "id": 10276, "pid": 494, "tid": 494, "ts": 1742522672426717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 16384u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672508522, "dur": 55, "args": {"External id": 10277, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10277, "registers per thread": 168, "shared memory": 217184, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10277, "pid": 0, "tid": 7, "ts": 1742522672508522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672426720, "dur": 5, "args": {"External id": 10277, "cbid": 430, "correlation": 10277}}, {"ph": "s", "id": 10277, "pid": 494, "tid": 494, "ts": 1742522672426720, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672508579, "dur": 5, "args": {"External id": 10299, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10299, "registers per thread": 84, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 10299, "pid": 0, "tid": 7, "ts": 1742522672508579, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426814, "dur": 6, "args": {"External id": 10299, "cbid": 307, "correlation": 10299}}, {"ph": "s", "id": 10299, "pid": 494, "tid": 494, "ts": 1742522672426814, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags", "pid": 494, "tid": 494, "ts": 1742522672426869, "dur": 2, "args": {"External id": 10309, "cbid": 251, "correlation": 10309}}, {"ph": "f", "id": 10309, "pid": 494, "tid": 494, "ts": 1742522672426869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672426872, "dur": 0, "args": {"External id": 10310, "cbid": 273, "correlation": 10310}}, {"ph": "f", "id": 10310, "pid": 494, "tid": 494, "ts": 1742522672426872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void cutlass::Kernel<cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8>(cutlass_80_tensorop_s16816gemm_bf16_128x64_64x4_tn_align8::Params)", "pid": 0, "tid": 7, "ts": 1742522672508587, "dur": 8, "args": {"External id": 10311, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10311, "registers per thread": 150, "shared memory": 98304, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [8, 1, 16], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10311, "pid": 0, "tid": 7, "ts": 1742522672508587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426873, "dur": 5, "args": {"External id": 10311, "cbid": 211, "correlation": 10311}}, {"ph": "s", "id": 10311, "pid": 494, "tid": 494, "ts": 1742522672426873, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void splitKreduce_kernel<32, 16, int, float, float, float, float, true, false, false>(cublasSplitKParams<float>, float const*, float const*, float*, float const*, float const*, float const*, float const*, float*, void*, long, float*, int*)", "pid": 0, "tid": 7, "ts": 1742522672508598, "dur": 2, "args": {"External id": 10313, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10313, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [8, 16, 1], "block": [32, 16, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 10313, "pid": 0, "tid": 7, "ts": 1742522672508598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426881, "dur": 4, "args": {"External id": 10313, "cbid": 211, "correlation": 10313}}, {"ph": "s", "id": 10313, "pid": 494, "tid": 494, "ts": 1742522672426881, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672426912, "dur": 1, "args": {"External id": 10322, "cbid": 317, "correlation": 10322}}, {"ph": "f", "id": 10322, "pid": 494, "tid": 494, "ts": 1742522672426912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::normal_kernel<at::CUDAGeneratorImpl*>(at::TensorBase const&, double, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672508604, "dur": 2, "args": {"External id": 10324, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10324, "registers per thread": 40, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 15.515152, "grid": [256, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 10324, "pid": 0, "tid": 7, "ts": 1742522672508604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426915, "dur": 5, "args": {"External id": 10324, "cbid": 211, "correlation": 10324}}, {"ph": "s", "id": 10324, "pid": 494, "tid": 494, "ts": 1742522672426915, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void cuda::top2_sum_gate<32, 8, 256, 8, 4>(float const*, float const*, long*, float*, bool const*, int const*, int const*, int, int, int, float, int, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672508609, "dur": 7, "args": {"External id": 10349, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10349, "registers per thread": 47, "shared memory": 2120, "blocks per SM": 1.939394, "warps per SM": 1.939394, "grid": [256, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 3}}, {"ph": "f", "id": 10349, "pid": 0, "tid": 7, "ts": 1742522672508609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672426968, "dur": 4, "args": {"External id": 10349, "cbid": 211, "correlation": 10349}}, {"ph": "s", "id": 10349, "pid": 494, "tid": 494, "ts": 1742522672426968, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672427031, "dur": 1, "args": {"External id": 10365, "cbid": 147, "correlation": 10365}}, {"ph": "s", "id": 10365, "pid": 494, "tid": 494, "ts": 1742522672427031, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 16, "ts": 1742522672508622, "dur": 202, "args": {"External id": 10406, "queued": 0, "device": 0, "context": 1, "stream": 16, "correlation": 10406, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 10406, "pid": 0, "tid": 16, "ts": 1742522672508622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427058, "dur": 36, "args": {"External id": 10406, "cbid": 430, "correlation": 10406}}, {"ph": "s", "id": 10406, "pid": 494, "tid": 494, "ts": 1742522672427058, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672427112, "dur": 1, "args": {"External id": 10415, "cbid": 147, "correlation": 10415}}, {"ph": "s", "id": 10415, "pid": 494, "tid": 494, "ts": 1742522672427112, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672508827, "dur": 18, "args": {"External id": 10459, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10459, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 10459, "pid": 0, "tid": 7, "ts": 1742522672508827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427146, "dur": 6, "args": {"External id": 10459, "cbid": 430, "correlation": 10459}}, {"ph": "s", "id": 10459, "pid": 494, "tid": 494, "ts": 1742522672427146, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427261, "dur": 0, "args": {"External id": 10469, "cbid": 468, "correlation": 10469}}, {"ph": "f", "id": 10469, "pid": 494, "tid": 494, "ts": 1742522672427261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427263, "dur": 0, "args": {"External id": 10471, "cbid": 468, "correlation": 10471}}, {"ph": "f", "id": 10471, "pid": 494, "tid": 494, "ts": 1742522672427263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427263, "dur": 0, "args": {"External id": 10473, "cbid": 468, "correlation": 10473}}, {"ph": "f", "id": 10473, "pid": 494, "tid": 494, "ts": 1742522672427263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427264, "dur": 0, "args": {"External id": 10475, "cbid": 468, "correlation": 10475}}, {"ph": "f", "id": 10475, "pid": 494, "tid": 494, "ts": 1742522672427264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672427265, "dur": 2, "args": {"External id": 10477, "cbid": 273, "correlation": 10477}}, {"ph": "f", "id": 10477, "pid": 494, "tid": 494, "ts": 1742522672427265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672508844, "dur": 78, "args": {"External id": 10478, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10478, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10478, "pid": 0, "tid": 7, "ts": 1742522672508844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427268, "dur": 7, "args": {"External id": 10478, "cbid": 430, "correlation": 10478}}, {"ph": "s", "id": 10478, "pid": 494, "tid": 494, "ts": 1742522672427268, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672508925, "dur": 9, "args": {"External id": 10494, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10494, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 10494, "pid": 0, "tid": 7, "ts": 1742522672508925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672427305, "dur": 5, "args": {"External id": 10494, "cbid": 211, "correlation": 10494}}, {"ph": "s", "id": 10494, "pid": 494, "tid": 494, "ts": 1742522672427305, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427387, "dur": 0, "args": {"External id": 10505, "cbid": 468, "correlation": 10505}}, {"ph": "f", "id": 10505, "pid": 494, "tid": 494, "ts": 1742522672427387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427388, "dur": 0, "args": {"External id": 10507, "cbid": 468, "correlation": 10507}}, {"ph": "f", "id": 10507, "pid": 494, "tid": 494, "ts": 1742522672427388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427389, "dur": 0, "args": {"External id": 10509, "cbid": 468, "correlation": 10509}}, {"ph": "f", "id": 10509, "pid": 494, "tid": 494, "ts": 1742522672427389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427389, "dur": 0, "args": {"External id": 10511, "cbid": 468, "correlation": 10511}}, {"ph": "f", "id": 10511, "pid": 494, "tid": 494, "ts": 1742522672427389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672427390, "dur": 2, "args": {"External id": 10513, "cbid": 273, "correlation": 10513}}, {"ph": "f", "id": 10513, "pid": 494, "tid": 494, "ts": 1742522672427390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672508935, "dur": 39, "args": {"External id": 10514, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10514, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10514, "pid": 0, "tid": 7, "ts": 1742522672508935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427393, "dur": 5, "args": {"External id": 10514, "cbid": 430, "correlation": 10514}}, {"ph": "s", "id": 10514, "pid": 494, "tid": 494, "ts": 1742522672427393, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::dispatch_ll<true, 3, 10, 7168>(void*, float*, int*, long*, int*, int*, void*, int*, void*, void const*, long const*, int*, int*, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672508978, "dur": 37, "args": {"External id": 10515, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10515, "registers per thread": 64, "shared memory": 36, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 10515, "pid": 0, "tid": 7, "ts": 1742522672508978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427405, "dur": 4, "args": {"External id": 10515, "cbid": 430, "correlation": 10515}}, {"ph": "s", "id": 10515, "pid": 494, "tid": 494, "ts": 1742522672427405, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672509017, "dur": 23, "args": {"External id": 10524, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10524, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 10524, "pid": 0, "tid": 7, "ts": 1742522672509017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427423, "dur": 4, "args": {"External id": 10524, "cbid": 430, "correlation": 10524}}, {"ph": "s", "id": 10524, "pid": 494, "tid": 494, "ts": 1742522672427423, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void cuda::per_token_cast_to_fp8_with_channels<__nv_bfloat16, 128, true, 128, false>(__nv_bfloat16 const*, int, unsigned char*, float*, int, int, int, int, bool, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672509043, "dur": 5, "args": {"External id": 10540, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10540, "registers per thread": 30, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 10540, "pid": 0, "tid": 7, "ts": 1742522672509043, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672427463, "dur": 4, "args": {"External id": 10540, "cbid": 211, "correlation": 10540}}, {"ph": "s", "id": 10540, "pid": 494, "tid": 494, "ts": 1742522672427463, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427549, "dur": 0, "args": {"External id": 10552, "cbid": 468, "correlation": 10552}}, {"ph": "f", "id": 10552, "pid": 494, "tid": 494, "ts": 1742522672427549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427551, "dur": 0, "args": {"External id": 10554, "cbid": 468, "correlation": 10554}}, {"ph": "f", "id": 10554, "pid": 494, "tid": 494, "ts": 1742522672427551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427551, "dur": 0, "args": {"External id": 10556, "cbid": 468, "correlation": 10556}}, {"ph": "f", "id": 10556, "pid": 494, "tid": 494, "ts": 1742522672427551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427552, "dur": 0, "args": {"External id": 10558, "cbid": 468, "correlation": 10558}}, {"ph": "f", "id": 10558, "pid": 494, "tid": 494, "ts": 1742522672427552, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672427552, "dur": 2, "args": {"External id": 10560, "cbid": 273, "correlation": 10560}}, {"ph": "f", "id": 10560, "pid": 494, "tid": 494, "ts": 1742522672427552, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 64u, 128u, 8u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672509049, "dur": 21, "args": {"External id": 10561, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10561, "registers per thread": 168, "shared memory": 217440, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10561, "pid": 0, "tid": 7, "ts": 1742522672509049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427556, "dur": 7, "args": {"External id": 10561, "cbid": 430, "correlation": 10561}}, {"ph": "s", "id": 10561, "pid": 494, "tid": 494, "ts": 1742522672427556, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void cuda::swiglu_forward_with_weight_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, false, true, 128>(unsigned char*, __nv_bfloat16 const*, int const*, float const*, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672509074, "dur": 4, "args": {"External id": 10577, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10577, "registers per thread": 32, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 10577, "pid": 0, "tid": 7, "ts": 1742522672509074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672427590, "dur": 5, "args": {"External id": 10577, "cbid": 211, "correlation": 10577}}, {"ph": "s", "id": 10577, "pid": 494, "tid": 494, "ts": 1742522672427590, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427684, "dur": 0, "args": {"External id": 10589, "cbid": 468, "correlation": 10589}}, {"ph": "f", "id": 10589, "pid": 494, "tid": 494, "ts": 1742522672427684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427686, "dur": 0, "args": {"External id": 10591, "cbid": 468, "correlation": 10591}}, {"ph": "f", "id": 10591, "pid": 494, "tid": 494, "ts": 1742522672427686, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427686, "dur": 0, "args": {"External id": 10593, "cbid": 468, "correlation": 10593}}, {"ph": "f", "id": 10593, "pid": 494, "tid": 494, "ts": 1742522672427686, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427687, "dur": 0, "args": {"External id": 10595, "cbid": 468, "correlation": 10595}}, {"ph": "f", "id": 10595, "pid": 494, "tid": 494, "ts": 1742522672427687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672427687, "dur": 2, "args": {"External id": 10597, "cbid": 273, "correlation": 10597}}, {"ph": "f", "id": 10597, "pid": 494, "tid": 494, "ts": 1742522672427687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)0>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672509079, "dur": 11, "args": {"External id": 10598, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10598, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10598, "pid": 0, "tid": 7, "ts": 1742522672509079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427691, "dur": 6, "args": {"External id": 10598, "cbid": 430, "correlation": 10598}}, {"ph": "s", "id": 10598, "pid": 494, "tid": 494, "ts": 1742522672427691, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427773, "dur": 0, "args": {"External id": 10608, "cbid": 468, "correlation": 10608}}, {"ph": "f", "id": 10608, "pid": 494, "tid": 494, "ts": 1742522672427773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427774, "dur": 0, "args": {"External id": 10610, "cbid": 468, "correlation": 10610}}, {"ph": "f", "id": 10610, "pid": 494, "tid": 494, "ts": 1742522672427774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427774, "dur": 0, "args": {"External id": 10612, "cbid": 468, "correlation": 10612}}, {"ph": "f", "id": 10612, "pid": 494, "tid": 494, "ts": 1742522672427774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427775, "dur": 0, "args": {"External id": 10614, "cbid": 468, "correlation": 10614}}, {"ph": "f", "id": 10614, "pid": 494, "tid": 494, "ts": 1742522672427775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672427775, "dur": 0, "args": {"External id": 10616, "cbid": 273, "correlation": 10616}}, {"ph": "f", "id": 10616, "pid": 494, "tid": 494, "ts": 1742522672427775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<4096u, 7168u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672509090, "dur": 83, "args": {"External id": 10617, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10617, "registers per thread": 168, "shared memory": 216608, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10617, "pid": 0, "tid": 7, "ts": 1742522672509090, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427777, "dur": 4, "args": {"External id": 10617, "cbid": 430, "correlation": 10617}}, {"ph": "s", "id": 10617, "pid": 494, "tid": 494, "ts": 1742522672427777, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void cuda::batched_swiglu_forward_and_per_token_cast_to_fp8_with_channels_impl<__nv_bfloat16, 128, 512, true, 128>(unsigned char*, __nv_bfloat16 const*, int, int, int, float*, int const*, float, __nv_fp8_interpretation_t)", "pid": 0, "tid": 7, "ts": 1742522672509176, "dur": 8, "args": {"External id": 10633, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10633, "registers per thread": 33, "shared memory": 0, "blocks per SM": 1.939394, "warps per SM": 31.030304, "grid": [256, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 10633, "pid": 0, "tid": 7, "ts": 1742522672509176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672427801, "dur": 4, "args": {"External id": 10633, "cbid": 211, "correlation": 10633}}, {"ph": "s", "id": 10633, "pid": 494, "tid": 494, "ts": 1742522672427801, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427867, "dur": 0, "args": {"External id": 10644, "cbid": 468, "correlation": 10644}}, {"ph": "f", "id": 10644, "pid": 494, "tid": 494, "ts": 1742522672427867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427868, "dur": 0, "args": {"External id": 10646, "cbid": 468, "correlation": 10646}}, {"ph": "f", "id": 10646, "pid": 494, "tid": 494, "ts": 1742522672427868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427869, "dur": 0, "args": {"External id": 10648, "cbid": 468, "correlation": 10648}}, {"ph": "f", "id": 10648, "pid": 494, "tid": 494, "ts": 1742522672427869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "INVALID", "pid": 494, "tid": 494, "ts": 1742522672427869, "dur": 0, "args": {"External id": 10650, "cbid": 468, "correlation": 10650}}, {"ph": "f", "id": 10650, "pid": 494, "tid": 494, "ts": 1742522672427869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672427870, "dur": 0, "args": {"External id": 10652, "cbid": 273, "correlation": 10652}}, {"ph": "f", "id": 10652, "pid": 494, "tid": 494, "ts": 1742522672427870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::gemm::fp8_gemm_kernel<7168u, 2048u, 128u, 112u, 128u, 6u, 128u, 128u, 1u, (dpsk::gemm::GemmType)2>(__nv_bfloat16*, float*, int*, unsigned int, unsigned int, unsigned int, unsigned int, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st, CUtensorMap_st)", "pid": 0, "tid": 7, "ts": 1742522672509185, "dur": 39, "args": {"External id": 10653, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10653, "registers per thread": 168, "shared memory": 216288, "blocks per SM": 1, "warps per SM": 12, "grid": [132, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10653, "pid": 0, "tid": 7, "ts": 1742522672509185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427871, "dur": 4, "args": {"External id": 10653, "cbid": 430, "correlation": 10653}}, {"ph": "s", "id": 10653, "pid": 494, "tid": 494, "ts": 1742522672427871, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 7, "ts": 1742522672509227, "dur": 154, "args": {"External id": 10654, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10654, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 10654, "pid": 0, "tid": 7, "ts": 1742522672509227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427881, "dur": 4, "args": {"External id": 10654, "cbid": 430, "correlation": 10654}}, {"ph": "s", "id": 10654, "pid": 494, "tid": 494, "ts": 1742522672427881, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672427895, "dur": 1, "args": {"External id": 10663, "cbid": 147, "correlation": 10663}}, {"ph": "s", "id": 10663, "pid": 494, "tid": 494, "ts": 1742522672427895, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void dpsk::ep::internode::combine_ll<3, 10, 7168, 9>(void*, void*, int*, void*, void const*, long const*, float const*, int const*, long const*, int*, int, int*, int, int, int, int, int, int, int, int*, int)", "pid": 0, "tid": 16, "ts": 1742522672509385, "dur": 362, "args": {"External id": 10676, "queued": 0, "device": 0, "context": 1, "stream": 16, "correlation": 10676, "registers per thread": 64, "shared memory": 0, "blocks per SM": 0.6515151, "warps per SM": 19.545454, "grid": [86, 1, 1], "block": [960, 1, 1], "est. achieved occupancy %": 31}}, {"ph": "f", "id": 10676, "pid": 0, "tid": 16, "ts": 1742522672509385, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672427904, "dur": 5, "args": {"External id": 10676, "cbid": 430, "correlation": 10676}}, {"ph": "s", "id": 10676, "pid": 494, "tid": 494, "ts": 1742522672427904, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672427923, "dur": 1, "args": {"External id": 10689, "cbid": 147, "correlation": 10689}}, {"ph": "s", "id": 10689, "pid": 494, "tid": 494, "ts": 1742522672427923, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::CatArrayBatchedCopy<c10::BFloat16, unsigned int, 2, 128, 1>(c10::BFloat16*, at::native::(anonymous namespace)::CatArrInputTensorMetadata<c10::BFloat16, unsigned int, 128, 1>, at::native::(anonymous namespace)::TensorSizeStride<unsigned int, 4u>, int, unsigned int)", "pid": 0, "tid": 7, "ts": 1742522672509749, "dur": 5, "args": {"External id": 10698, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10698, "registers per thread": 21, "shared memory": 0, "blocks per SM": 4, "warps per SM": 64, "grid": [264, 2, 1], "block": [512, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 10698, "pid": 0, "tid": 7, "ts": 1742522672509749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672427937, "dur": 5, "args": {"External id": 10698, "cbid": 211, "correlation": 10698}}, {"ph": "s", "id": 10698, "pid": 494, "tid": 494, "ts": 1742522672427937, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3> >(int, at::native::CUDAFunctor_add<c10::BFloat16>, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672509756, "dur": 4, "args": {"External id": 10708, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10708, "registers per thread": 24, "shared memory": 0, "blocks per SM": 27.151516, "warps per SM": 108.606064, "grid": [3584, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 10708, "pid": 0, "tid": 7, "ts": 1742522672509756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672427967, "dur": 4, "args": {"External id": 10708, "cbid": 211, "correlation": 10708}}, {"ph": "s", "id": 10708, "pid": 494, "tid": 494, "ts": 1742522672427967, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "_layer_norm_kernel", "pid": 0, "tid": 7, "ts": 1742522672509762, "dur": 5, "args": {"External id": 10737, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10737, "registers per thread": 84, "shared memory": 16, "blocks per SM": 1.939394, "warps per SM": 7.757576, "grid": [256, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 10737, "pid": 0, "tid": 7, "ts": 1742522672509762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_driver", "name": "cuLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428075, "dur": 5, "args": {"External id": 10737, "cbid": 307, "correlation": 10737}}, {"ph": "s", "id": 10737, "pid": 494, "tid": 494, "ts": 1742522672428075, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::index_elementwise_kernel<128, 4, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1}>(long, at::native::gpu_index_kernel<at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1}>(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>, at::native::index_kernel_impl<at::native::OpaqueType<2> >(at::TensorIteratorBase&, c10::ArrayRef<long>, c10::ArrayRef<long>)::{lambda(char*, char const*, long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672509769, "dur": 7, "args": {"External id": 10746, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10746, "registers per thread": 40, "shared memory": 0, "blocks per SM": 13.575758, "warps per SM": 54.303032, "grid": [1792, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 75}}, {"ph": "f", "id": 10746, "pid": 0, "tid": 7, "ts": 1742522672509769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428117, "dur": 5, "args": {"External id": 10746, "cbid": 211, "correlation": 10746}}, {"ph": "s", "id": 10746, "pid": 494, "tid": 494, "ts": 1742522672428117, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncGetAttributes", "pid": 494, "tid": 494, "ts": 1742522672428152, "dur": 3, "args": {"External id": 10761, "cbid": 15, "correlation": 10761}}, {"ph": "f", "id": 10761, "pid": 494, "tid": 494, "ts": 1742522672428152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memset", "name": "Memset (Device)", "pid": 0, "tid": 7, "ts": 1742522672509778, "dur": 0, "args": {"External id": 10762, "device": 0, "context": 1, "stream": 7, "correlation": 10762, "bytes": 4, "memory bandwidth (GB/s)": 0.005681818181818182}}, {"ph": "f", "id": 10762, "pid": 0, "tid": 7, "ts": 1742522672509778, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemsetAsync", "pid": 494, "tid": 494, "ts": 1742522672428162, "dur": 6, "args": {"External id": 10762, "cbid": 51, "correlation": 10762}}, {"ph": "s", "id": 10762, "pid": 494, "tid": 494, "ts": 1742522672428162, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memset", "name": "Memset (Device)", "pid": 0, "tid": 7, "ts": 1742522672509780, "dur": 1, "args": {"External id": 10763, "device": 0, "context": 1, "stream": 7, "correlation": 10763, "bytes": 1792, "memory bandwidth (GB/s)": 1.6}}, {"ph": "f", "id": 10763, "pid": 0, "tid": 7, "ts": 1742522672509780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemsetAsync", "pid": 494, "tid": 494, "ts": 1742522672428168, "dur": 2, "args": {"External id": 10763, "cbid": 51, "correlation": 10763}}, {"ph": "s", "id": 10763, "pid": 494, "tid": 494, "ts": 1742522672428168, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672428172, "dur": 0, "args": {"External id": 10764, "cbid": 273, "correlation": 10764}}, {"ph": "f", "id": 10764, "pid": 494, "tid": 494, "ts": 1742522672428172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672428173, "dur": 0, "args": {"External id": 10765, "cbid": 273, "correlation": 10765}}, {"ph": "f", "id": 10765, "pid": 494, "tid": 494, "ts": 1742522672428173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672428173, "dur": 0, "args": {"External id": 10766, "cbid": 273, "correlation": 10766}}, {"ph": "f", "id": 10766, "pid": 494, "tid": 494, "ts": 1742522672428173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "sm90_xmma_gemm_bf16f32_bf16f32_f32_tn_n_tilesize128x128x64_warpgroupsize1x1x1_execute_segment_k_on_kernel__5x_cublas", "pid": 0, "tid": 7, "ts": 1742522672509783, "dur": 669, "args": {"External id": 10767, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10767, "registers per thread": 168, "shared memory": 231424, "blocks per SM": 0.90909094, "warps per SM": 10.909091, "grid": [120, 1, 1], "block": [384, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10767, "pid": 0, "tid": 7, "ts": 1742522672509783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernelExC", "pid": 494, "tid": 494, "ts": 1742522672428174, "dur": 5, "args": {"External id": 10767, "cbid": 430, "correlation": 10767}}, {"ph": "s", "id": 10767, "pid": 494, "tid": 494, "ts": 1742522672428174, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void vllm::apply_penalty_kernel<float, true, 256>(float*, float const*, float const*, float const*, long const*, int**, int, int, int)", "pid": 0, "tid": 7, "ts": 1742522672510454, "dur": 64, "args": {"External id": 10774, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10774, "registers per thread": 168, "shared memory": 0, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 10774, "pid": 0, "tid": 7, "ts": 1742522672510454, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428343, "dur": 4, "args": {"External id": 10774, "cbid": 211, "correlation": 10774}}, {"ph": "s", "id": 10774, "pid": 494, "tid": 494, "ts": 1742522672428343, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaFuncSetAttribute", "pid": 494, "tid": 494, "ts": 1742522672428363, "dur": 1, "args": {"External id": 10793, "cbid": 273, "correlation": 10793}}, {"ph": "f", "id": 10793, "pid": 494, "tid": 494, "ts": 1742522672428363, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void vllm::topk_kernel<float, unsigned int, 2>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, bool, bool, unsigned int, at::cuda::detail::TensorInfo<float, unsigned int>, at::cuda::detail::TensorInfo<long, unsigned int>)", "pid": 0, "tid": 7, "ts": 1742522672510521, "dur": 163, "args": {"External id": 10794, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10794, "registers per thread": 64, "shared memory": 113216, "blocks per SM": 0.969697, "warps per SM": 31.030304, "grid": [128, 1, 1], "block": [1024, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10794, "pid": 0, "tid": 7, "ts": 1742522672510521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428364, "dur": 3, "args": {"External id": 10794, "cbid": 211, "correlation": 10794}}, {"ph": "s", "id": 10794, "pid": 494, "tid": 494, "ts": 1742522672428364, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510686, "dur": 3, "args": {"External id": 10805, "device": 0, "context": 1, "stream": 7, "correlation": 10805, "bytes": 2097152, "memory bandwidth (GB/s)": 697.1914893617021}}, {"ph": "f", "id": 10805, "pid": 0, "tid": 7, "ts": 1742522672510686, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428384, "dur": 8, "args": {"External id": 10805, "cbid": 41, "correlation": 10805}}, {"ph": "s", "id": 10805, "pid": 494, "tid": 494, "ts": 1742522672428384, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672510690, "dur": 1, "args": {"External id": 10823, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10823, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 0.969697, "grid": [64, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 10823, "pid": 0, "tid": 7, "ts": 1742522672510690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428406, "dur": 3, "args": {"External id": 10823, "cbid": 211, "correlation": 10823}}, {"ph": "s", "id": 10823, "pid": 494, "tid": 494, "ts": 1742522672428406, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::elementwise_kernel<128, 2, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1}>(int, at::native::gpu_kernel_impl<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(at::TensorIteratorBase&, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1} const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672510693, "dur": 3, "args": {"External id": 10829, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10829, "registers per thread": 18, "shared memory": 0, "blocks per SM": 15.515152, "warps per SM": 62.060608, "grid": [2048, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 97}}, {"ph": "f", "id": 10829, "pid": 0, "tid": 7, "ts": 1742522672510693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428417, "dur": 3, "args": {"External id": 10829, "cbid": 211, "correlation": 10829}}, {"ph": "s", "id": 10829, "pid": 494, "tid": 494, "ts": 1742522672428417, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510698, "dur": 2, "args": {"External id": 10835, "device": 0, "context": 1, "stream": 7, "correlation": 10835, "bytes": 2097152, "memory bandwidth (GB/s)": 885.6216216216217}}, {"ph": "f", "id": 10835, "pid": 0, "tid": 7, "ts": 1742522672510698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428424, "dur": 4, "args": {"External id": 10835, "cbid": 41, "correlation": 10835}}, {"ph": "s", "id": 10835, "pid": 494, "tid": 494, "ts": 1742522672428424, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::radixSortKVInPlace<2, -1, 128, 32, float, long, unsigned int>(at::cuda::detail::TensorInfo<float, unsigned int>, unsigned int, unsigned int, unsigned int, at::cuda::detail::TensorInfo<long, unsigned int>, unsigned int, bool)", "pid": 0, "tid": 7, "ts": 1742522672510701, "dur": 24, "args": {"External id": 10840, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10840, "registers per thread": 217, "shared memory": 33808, "blocks per SM": 0.969697, "warps per SM": 3.878788, "grid": [128, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 6}}, {"ph": "f", "id": 10840, "pid": 0, "tid": 7, "ts": 1742522672510701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428433, "dur": 3, "args": {"External id": 10840, "cbid": 211, "correlation": 10840}}, {"ph": "s", "id": 10840, "pid": 494, "tid": 494, "ts": 1742522672428433, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void vllm::mask_top_p_kernel<4, float, float, 256>(float*, float const*, int, int)", "pid": 0, "tid": 7, "ts": 1742522672510727, "dur": 8, "args": {"External id": 10846, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10846, "registers per thread": 32, "shared memory": 1216, "blocks per SM": 0.969697, "warps per SM": 7.757576, "grid": [128, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 12}}, {"ph": "f", "id": 10846, "pid": 0, "tid": 7, "ts": 1742522672510727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428444, "dur": 3, "args": {"External id": 10846, "cbid": 211, "correlation": 10846}}, {"ph": "s", "id": 10846, "pid": 494, "tid": 494, "ts": 1742522672428444, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void (anonymous namespace)::elementwise_kernel_with_index<int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>(int, at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, function_traits<at::native::arange_cuda_out(c10::Scalar const&, c10::Scalar const&, c10::Scalar const&, at::Tensor&)::{lambda()#1}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}>::result_type*)", "pid": 0, "tid": 7, "ts": 1742522672510737, "dur": 1, "args": {"External id": 10864, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10864, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.4848485, "warps per SM": 0.969697, "grid": [64, 1, 1], "block": [64, 1, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 10864, "pid": 0, "tid": 7, "ts": 1742522672510737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428459, "dur": 3, "args": {"External id": 10864, "cbid": 211, "correlation": 10864}}, {"ph": "s", "id": 10864, "pid": 494, "tid": 494, "ts": 1742522672428459, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<true, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672510740, "dur": 10, "args": {"External id": 10877, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10877, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 10877, "pid": 0, "tid": 7, "ts": 1742522672510740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428485, "dur": 4, "args": {"External id": 10877, "cbid": 211, "correlation": 10877}}, {"ph": "s", "id": 10877, "pid": 494, "tid": 494, "ts": 1742522672428485, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<4> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672510752, "dur": 5, "args": {"External id": 10887, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10887, "registers per thread": 32, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 10887, "pid": 0, "tid": 7, "ts": 1742522672510752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428500, "dur": 3, "args": {"External id": 10887, "cbid": 211, "correlation": 10887}}, {"ph": "s", "id": 10887, "pid": 494, "tid": 494, "ts": 1742522672428500, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::cunn_SoftMaxForward<4, float, float, float, at::native::(anonymous namespace)::SoftMaxForwardEpilogue>(float*, float const*, int)", "pid": 0, "tid": 7, "ts": 1742522672510759, "dur": 5, "args": {"External id": 10898, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10898, "registers per thread": 30, "shared memory": 2048, "blocks per SM": 0.969697, "warps per SM": 15.515152, "grid": [128, 1, 1], "block": [512, 1, 1], "est. achieved occupancy %": 24}}, {"ph": "f", "id": 10898, "pid": 0, "tid": 7, "ts": 1742522672510759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428516, "dur": 3, "args": {"External id": 10898, "cbid": 211, "correlation": 10898}}, {"ph": "s", "id": 10898, "pid": 494, "tid": 494, "ts": 1742522672428516, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::log_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2> >(int, at::native::log_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>)", "pid": 0, "tid": 7, "ts": 1742522672510766, "dur": 2, "args": {"External id": 10908, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10908, "registers per thread": 22, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 10908, "pid": 0, "tid": 7, "ts": 1742522672510766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428527, "dur": 3, "args": {"External id": 10908, "cbid": 211, "correlation": 10908}}, {"ph": "s", "id": 10908, "pid": 494, "tid": 494, "ts": 1742522672428527, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510770, "dur": 2, "args": {"External id": 10914, "device": 0, "context": 1, "stream": 7, "correlation": 10914, "bytes": 2097152, "memory bandwidth (GB/s)": 771.0117647058823}}, {"ph": "f", "id": 10914, "pid": 0, "tid": 7, "ts": 1742522672510770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428543, "dur": 6, "args": {"External id": 10914, "cbid": 41, "correlation": 10914}}, {"ph": "s", "id": 10914, "pid": 494, "tid": 494, "ts": 1742522672428543, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672428563, "dur": 1, "args": {"External id": 10934, "cbid": 317, "correlation": 10934}}, {"ph": "f", "id": 10934, "pid": 494, "tid": 494, "ts": 1742522672428563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "void at::native::(anonymous namespace)::distribution_elementwise_grid_stride_kernel<float, 4, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1}>(int, at::PhiloxCudaState, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::(anonymous namespace)::distribution_nullary_kernel<float, float, 4, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2}, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::uniform_and_transform<float, float, 4ul, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1}>(at::TensorIteratorBase&, at::CUDAGeneratorImpl*, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(curandStatePhilox4_32_10*)#2} const&, at::native::templates::cuda::exponential_kernel<at::CUDAGeneratorImpl*>(at::TensorIteratorBase&, double, at::CUDAGeneratorImpl*)::{lambda()#1}::operator()() const::{lambda()#2}::operator()() const::{lambda(float)#1})::{lambda(int, float)#1})", "pid": 0, "tid": 7, "ts": 1742522672510774, "dur": 4, "args": {"External id": 10936, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10936, "registers per thread": 52, "shared memory": 0, "blocks per SM": 8, "warps per SM": 64, "grid": [1056, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 50}}, {"ph": "f", "id": 10936, "pid": 0, "tid": 7, "ts": 1742522672510774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428565, "dur": 4, "args": {"External id": 10936, "cbid": 211, "correlation": 10936}}, {"ph": "s", "id": 10936, "pid": 494, "tid": 494, "ts": 1742522672428565, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> >, at::detail::Array<char*, 3> >(int, at::native::BinaryFunctor<float, float, float, at::native::binary_internal::DivFunctor<float> >, at::detail::Array<char*, 3>)", "pid": 0, "tid": 7, "ts": 1742522672510780, "dur": 3, "args": {"External id": 10942, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10942, "registers per thread": 26, "shared memory": 0, "blocks per SM": 7.757576, "warps per SM": 31.030304, "grid": [1024, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 48}}, {"ph": "f", "id": 10942, "pid": 0, "tid": 7, "ts": 1742522672510780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428576, "dur": 3, "args": {"External id": 10942, "cbid": 211, "correlation": 10942}}, {"ph": "s", "id": 10942, "pid": 494, "tid": 494, "ts": 1742522672428576, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::ArgMaxOps<float>, unsigned int, long, 4> >(at::native::ReduceOp<float, at::native::ArgMaxOps<float>, unsigned int, long, 4>)", "pid": 0, "tid": 7, "ts": 1742522672510785, "dur": 16, "args": {"External id": 10956, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10956, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.060606062, "warps per SM": 0.969697, "grid": [8, 1, 1], "block": [32, 16, 1], "est. achieved occupancy %": 2}}, {"ph": "f", "id": 10956, "pid": 0, "tid": 7, "ts": 1742522672510785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428592, "dur": 3, "args": {"External id": 10956, "cbid": 211, "correlation": 10956}}, {"ph": "s", "id": 10956, "pid": 494, "tid": 494, "ts": 1742522672428592, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::_scatter_gather_elementwise_kernel<128, 4, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1}>(int, at::native::_cuda_scatter_gather_internal_kernel<false, at::native::OpaqueType<8> >::operator()<at::native::TensorAssign>(at::TensorIterator&, long, long, long, at::native::TensorAssign const&)::{lambda(int)#1})", "pid": 0, "tid": 7, "ts": 1742522672510802, "dur": 2, "args": {"External id": 10969, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10969, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10969, "pid": 0, "tid": 7, "ts": 1742522672510802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428610, "dur": 3, "args": {"External id": 10969, "cbid": 211, "correlation": 10969}}, {"ph": "s", "id": 10969, "pid": 494, "tid": 494, "ts": 1742522672428610, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510806, "dur": 1, "args": {"External id": 10975, "device": 0, "context": 1, "stream": 7, "correlation": 10975, "bytes": 1024, "memory bandwidth (GB/s)": 0.7272727272727273}}, {"ph": "f", "id": 10975, "pid": 0, "tid": 7, "ts": 1742522672510806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428626, "dur": 5, "args": {"External id": 10975, "cbid": 41, "correlation": 10975}}, {"ph": "s", "id": 10975, "pid": 494, "tid": 494, "ts": 1742522672428626, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510808, "dur": 3, "args": {"External id": 10983, "device": 0, "context": 1, "stream": 7, "correlation": 10983, "bytes": 4194304, "memory bandwidth (GB/s)": 1074.360655737705}}, {"ph": "f", "id": 10983, "pid": 0, "tid": 7, "ts": 1742522672510808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428640, "dur": 4, "args": {"External id": 10983, "cbid": 41, "correlation": 10983}}, {"ph": "s", "id": 10983, "pid": 494, "tid": 494, "ts": 1742522672428640, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "vllm::revert_output_bin_count_kernel(long const*, int**, int)", "pid": 0, "tid": 7, "ts": 1742522672510813, "dur": 2, "args": {"External id": 10991, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 10991, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.060606062, "grid": [1, 1, 1], "block": [256, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 10991, "pid": 0, "tid": 7, "ts": 1742522672510813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428650, "dur": 4, "args": {"External id": 10991, "cbid": 211, "correlation": 10991}}, {"ph": "s", "id": 10991, "pid": 494, "tid": 494, "ts": 1742522672428650, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510817, "dur": 1, "args": {"External id": 11001, "device": 0, "context": 1, "stream": 7, "correlation": 11001, "bytes": 5120, "memory bandwidth (GB/s)": 2.962962962962963}}, {"ph": "f", "id": 11001, "pid": 0, "tid": 7, "ts": 1742522672510817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428669, "dur": 4, "args": {"External id": 11001, "cbid": 41, "correlation": 11001}}, {"ph": "s", "id": 11001, "pid": 494, "tid": 494, "ts": 1742522672428669, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510820, "dur": 1, "args": {"External id": 11013, "device": 0, "context": 1, "stream": 7, "correlation": 11013, "bytes": 2560, "memory bandwidth (GB/s)": 1.5384615384615385}}, {"ph": "f", "id": 11013, "pid": 0, "tid": 7, "ts": 1742522672510820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428688, "dur": 3, "args": {"External id": 11013, "cbid": 41, "correlation": 11013}}, {"ph": "s", "id": 11013, "pid": 494, "tid": 494, "ts": 1742522672428688, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428702, "dur": 2, "args": {"External id": 11019, "cbid": 138, "correlation": 11019}}, {"ph": "f", "id": 11019, "pid": 494, "tid": 494, "ts": 1742522672428702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510828, "dur": 2, "args": {"External id": 11023, "device": 0, "context": 1, "stream": 7, "correlation": 11023, "bytes": 1024, "memory bandwidth (GB/s)": 0.43243243243243246}}, {"ph": "f", "id": 11023, "pid": 0, "tid": 7, "ts": 1742522672510828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428708, "dur": 6, "args": {"External id": 11023, "cbid": 41, "correlation": 11023}}, {"ph": "s", "id": 11023, "pid": 494, "tid": 494, "ts": 1742522672428708, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428720, "dur": 0, "args": {"External id": 11026, "cbid": 138, "correlation": 11026}}, {"ph": "f", "id": 11026, "pid": 494, "tid": 494, "ts": 1742522672428720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510832, "dur": 2, "args": {"External id": 11030, "device": 0, "context": 1, "stream": 7, "correlation": 11030, "bytes": 512, "memory bandwidth (GB/s)": 0.21333333333333335}}, {"ph": "f", "id": 11030, "pid": 0, "tid": 7, "ts": 1742522672510832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428723, "dur": 3, "args": {"External id": 11030, "cbid": 41, "correlation": 11030}}, {"ph": "s", "id": 11030, "pid": 494, "tid": 494, "ts": 1742522672428723, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428731, "dur": 0, "args": {"External id": 11033, "cbid": 138, "correlation": 11033}}, {"ph": "f", "id": 11033, "pid": 494, "tid": 494, "ts": 1742522672428731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510836, "dur": 2, "args": {"External id": 11037, "device": 0, "context": 1, "stream": 7, "correlation": 11037, "bytes": 512, "memory bandwidth (GB/s)": 0.2191780821917808}}, {"ph": "f", "id": 11037, "pid": 0, "tid": 7, "ts": 1742522672510836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428734, "dur": 3, "args": {"External id": 11037, "cbid": 41, "correlation": 11037}}, {"ph": "s", "id": 11037, "pid": 494, "tid": 494, "ts": 1742522672428734, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428742, "dur": 0, "args": {"External id": 11040, "cbid": 138, "correlation": 11040}}, {"ph": "f", "id": 11040, "pid": 494, "tid": 494, "ts": 1742522672428742, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510841, "dur": 2, "args": {"External id": 11044, "device": 0, "context": 1, "stream": 7, "correlation": 11044, "bytes": 1024, "memory bandwidth (GB/s)": 0.4383561643835616}}, {"ph": "f", "id": 11044, "pid": 0, "tid": 7, "ts": 1742522672510841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428745, "dur": 2, "args": {"External id": 11044, "cbid": 41, "correlation": 11044}}, {"ph": "s", "id": 11044, "pid": 494, "tid": 494, "ts": 1742522672428745, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428752, "dur": 0, "args": {"External id": 11047, "cbid": 138, "correlation": 11047}}, {"ph": "f", "id": 11047, "pid": 494, "tid": 494, "ts": 1742522672428752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510845, "dur": 2, "args": {"External id": 11051, "device": 0, "context": 1, "stream": 7, "correlation": 11051, "bytes": 1024, "memory bandwidth (GB/s)": 0.463768115942029}}, {"ph": "f", "id": 11051, "pid": 0, "tid": 7, "ts": 1742522672510845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428755, "dur": 2, "args": {"External id": 11051, "cbid": 41, "correlation": 11051}}, {"ph": "s", "id": 11051, "pid": 494, "tid": 494, "ts": 1742522672428755, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<long, at::native::func_wrapper_t<long, at::native::sum_functor<long, long, long>::operator()(at::TensorIterator&)::{lambda(long, long)#1}>, unsigned int, long, 4> >(at::native::ReduceOp<long, at::native::func_wrapper_t<long, at::native::sum_functor<long, long, long>::operator()(at::TensorIterator&)::{lambda(long, long)#1}>, unsigned int, long, 4>)", "pid": 0, "tid": 7, "ts": 1742522672510849, "dur": 3, "args": {"External id": 11064, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 11064, "registers per thread": 32, "shared memory": 1040, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 11064, "pid": 0, "tid": 7, "ts": 1742522672510849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428770, "dur": 5, "args": {"External id": 11064, "cbid": 211, "correlation": 11064}}, {"ph": "s", "id": 11064, "pid": 494, "tid": 494, "ts": 1742522672428770, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428779, "dur": 0, "args": {"External id": 11068, "cbid": 138, "correlation": 11068}}, {"ph": "f", "id": 11068, "pid": 494, "tid": 494, "ts": 1742522672428779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510860, "dur": 2, "args": {"External id": 11072, "device": 0, "context": 1, "stream": 7, "correlation": 11072, "bytes": 8, "memory bandwidth (GB/s)": 0.0036231884057971015}}, {"ph": "f", "id": 11072, "pid": 0, "tid": 7, "ts": 1742522672510860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428782, "dur": 3, "args": {"External id": 11072, "cbid": 41, "correlation": 11072}}, {"ph": "s", "id": 11072, "pid": 494, "tid": 494, "ts": 1742522672428782, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428790, "dur": 0, "args": {"External id": 11075, "cbid": 138, "correlation": 11075}}, {"ph": "f", "id": 11075, "pid": 494, "tid": 494, "ts": 1742522672428790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510865, "dur": 2, "args": {"External id": 11079, "device": 0, "context": 1, "stream": 7, "correlation": 11079, "bytes": 2560, "memory bandwidth (GB/s)": 1.1428571428571428}}, {"ph": "f", "id": 11079, "pid": 0, "tid": 7, "ts": 1742522672510865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428793, "dur": 3, "args": {"External id": 11079, "cbid": 41, "correlation": 11079}}, {"ph": "s", "id": 11079, "pid": 494, "tid": 494, "ts": 1742522672428793, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428800, "dur": 0, "args": {"External id": 11082, "cbid": 138, "correlation": 11082}}, {"ph": "f", "id": 11082, "pid": 494, "tid": 494, "ts": 1742522672428800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510869, "dur": 2, "args": {"External id": 11086, "device": 0, "context": 1, "stream": 7, "correlation": 11086, "bytes": 5120, "memory bandwidth (GB/s)": 2.191780821917808}}, {"ph": "f", "id": 11086, "pid": 0, "tid": 7, "ts": 1742522672510869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428803, "dur": 2, "args": {"External id": 11086, "cbid": 41, "correlation": 11086}}, {"ph": "s", "id": 11086, "pid": 494, "tid": 494, "ts": 1742522672428803, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428809, "dur": 0, "args": {"External id": 11089, "cbid": 138, "correlation": 11089}}, {"ph": "f", "id": 11089, "pid": 494, "tid": 494, "ts": 1742522672428809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510873, "dur": 2, "args": {"External id": 11093, "device": 0, "context": 1, "stream": 7, "correlation": 11093, "bytes": 2560, "memory bandwidth (GB/s)": 1.0526315789473684}}, {"ph": "f", "id": 11093, "pid": 0, "tid": 7, "ts": 1742522672510873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428812, "dur": 2, "args": {"External id": 11093, "cbid": 41, "correlation": 11093}}, {"ph": "s", "id": 11093, "pid": 494, "tid": 494, "ts": 1742522672428812, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1> >(int, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672510878, "dur": 1, "args": {"External id": 11105, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 11105, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.015151516, "warps per SM": 0.060606062, "grid": [2, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 11105, "pid": 0, "tid": 7, "ts": 1742522672510878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672428826, "dur": 4, "args": {"External id": 11105, "cbid": 211, "correlation": 11105}}, {"ph": "s", "id": 11105, "pid": 494, "tid": 494, "ts": 1742522672428826, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672428834, "dur": 0, "args": {"External id": 11110, "cbid": 138, "correlation": 11110}}, {"ph": "f", "id": 11110, "pid": 494, "tid": 494, "ts": 1742522672428834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510885, "dur": 2, "args": {"External id": 11114, "device": 0, "context": 1, "stream": 7, "correlation": 11114, "bytes": 2560, "memory bandwidth (GB/s)": 1.1267605633802817}}, {"ph": "f", "id": 11114, "pid": 0, "tid": 7, "ts": 1742522672510885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672428837, "dur": 3, "args": {"External id": 11114, "cbid": 41, "correlation": 11114}}, {"ph": "s", "id": 11114, "pid": 494, "tid": 494, "ts": 1742522672428837, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510889, "dur": 0, "args": {"External id": 12027, "device": 0, "context": 1, "stream": 7, "correlation": 12027, "bytes": 1024, "memory bandwidth (GB/s)": 1.032258064516129}}, {"ph": "f", "id": 12027, "pid": 0, "tid": 7, "ts": 1742522672510889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672429288, "dur": 6, "args": {"External id": 12027, "cbid": 41, "correlation": 12027}}, {"ph": "s", "id": 12027, "pid": 494, "tid": 494, "ts": 1742522672429288, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void unbind_impl<float, 1024>(float const*, long*, int, int)", "pid": 0, "tid": 7, "ts": 1742522672510892, "dur": 2, "args": {"External id": 12031, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 12031, "registers per thread": 16, "shared memory": 0, "blocks per SM": 3.878788, "warps per SM": 124.121216, "grid": [128, 4, 1], "block": [1024, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 12031, "pid": 0, "tid": 7, "ts": 1742522672510892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672429298, "dur": 6, "args": {"External id": 12031, "cbid": 211, "correlation": 12031}}, {"ph": "s", "id": 12031, "pid": 494, "tid": 494, "ts": 1742522672429298, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy HtoD (Pageable -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510896, "dur": 1, "args": {"External id": 12945, "device": 0, "context": 1, "stream": 7, "correlation": 12945, "bytes": 1024, "memory bandwidth (GB/s)": 0.9696969696969697}}, {"ph": "f", "id": 12945, "pid": 0, "tid": 7, "ts": 1742522672510896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672429752, "dur": 4, "args": {"External id": 12945, "cbid": 41, "correlation": 12945}}, {"ph": "s", "id": 12945, "pid": 494, "tid": 494, "ts": 1742522672429752, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void unbind_impl<long, 1024>(long const*, long*, int, int)", "pid": 0, "tid": 7, "ts": 1742522672510899, "dur": 3, "args": {"External id": 12949, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 12949, "registers per thread": 16, "shared memory": 0, "blocks per SM": 3.878788, "warps per SM": 124.121216, "grid": [128, 4, 1], "block": [1024, 1, 1], "est. achieved occupancy %": 100}}, {"ph": "f", "id": 12949, "pid": 0, "tid": 7, "ts": 1742522672510899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672429760, "dur": 5, "args": {"External id": 12949, "cbid": 211, "correlation": 12949}}, {"ph": "s", "id": 12949, "pid": 494, "tid": 494, "ts": 1742522672429760, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672429780, "dur": 1, "args": {"External id": 12953, "cbid": 138, "correlation": 12953}}, {"ph": "f", "id": 12953, "pid": 494, "tid": 494, "ts": 1742522672429780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672510910, "dur": 2, "args": {"External id": 12957, "device": 0, "context": 1, "stream": 7, "correlation": 12957, "bytes": 8, "memory bandwidth (GB/s)": 0.003472222222222222}}, {"ph": "f", "id": 12957, "pid": 0, "tid": 7, "ts": 1742522672510910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672429784, "dur": 4, "args": {"External id": 12957, "cbid": 41, "correlation": 12957}}, {"ph": "s", "id": 12957, "pid": 494, "tid": 494, "ts": 1742522672429784, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1> >(int, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#4}::operator()() const::{lambda(long)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672510914, "dur": 2, "args": {"External id": 12974, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 12974, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 12974, "pid": 0, "tid": 7, "ts": 1742522672510914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672429818, "dur": 5, "args": {"External id": 12974, "cbid": 211, "correlation": 12974}}, {"ph": "s", "id": 12974, "pid": 494, "tid": 494, "ts": 1742522672429818, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<256, 2, at::native::ReduceOp<long, at::native::func_wrapper_t<long, at::native::sum_functor<long, long, long>::operator()(at::TensorIterator&)::{lambda(long, long)#1}>, unsigned int, long, 4> >(at::native::ReduceOp<long, at::native::func_wrapper_t<long, at::native::sum_functor<long, long, long>::operator()(at::TensorIterator&)::{lambda(long, long)#1}>, unsigned int, long, 4>)", "pid": 0, "tid": 7, "ts": 1742522672510918, "dur": 3, "args": {"External id": 12983, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 12983, "registers per thread": 56, "shared memory": 16, "blocks per SM": 0.007575758, "warps per SM": 0.015151516, "grid": [1, 1, 1], "block": [32, 2, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 12983, "pid": 0, "tid": 7, "ts": 1742522672510918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672429831, "dur": 4, "args": {"External id": 12983, "cbid": 211, "correlation": 12983}}, {"ph": "s", "id": 12983, "pid": 494, "tid": 494, "ts": 1742522672429831, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1> >(int, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#3}::operator()() const::{lambda(int)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672510922, "dur": 1, "args": {"External id": 12990, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 12990, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.007575758, "warps per SM": 0.030303031, "grid": [1, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 12990, "pid": 0, "tid": 7, "ts": 1742522672510922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672429863, "dur": 3, "args": {"External id": 12990, "cbid": 211, "correlation": 12990}}, {"ph": "s", "id": 12990, "pid": 494, "tid": 494, "ts": 1742522672429863, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<long, at::native::func_wrapper_t<long, at::native::sum_functor<long, long, long>::operator()(at::TensorIterator&)::{lambda(long, long)#1}>, unsigned int, long, 4> >(at::native::ReduceOp<long, at::native::func_wrapper_t<long, at::native::sum_functor<long, long, long>::operator()(at::TensorIterator&)::{lambda(long, long)#1}>, unsigned int, long, 4>)", "pid": 0, "tid": 7, "ts": 1742522672510925, "dur": 3, "args": {"External id": 13007, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 13007, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.015151516, "warps per SM": 0.030303031, "grid": [2, 1, 1], "block": [2, 32, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 13007, "pid": 0, "tid": 7, "ts": 1742522672510925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672429882, "dur": 3, "args": {"External id": 13007, "cbid": 211, "correlation": 13007}}, {"ph": "s", "id": 13007, "pid": 494, "tid": 494, "ts": 1742522672429882, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::vectorized_elementwise_kernel<4, at::native::FillFunctor<long>, at::detail::Array<char*, 1> >(int, at::native::FillFunctor<long>, at::detail::Array<char*, 1>)", "pid": 0, "tid": 7, "ts": 1742522672510930, "dur": 1, "args": {"External id": 13021, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 13021, "registers per thread": 16, "shared memory": 0, "blocks per SM": 0.11363637, "warps per SM": 0.45454547, "grid": [15, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 1}}, {"ph": "f", "id": 13021, "pid": 0, "tid": 7, "ts": 1742522672510930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672429903, "dur": 3, "args": {"External id": 13021, "cbid": 211, "correlation": 13021}}, {"ph": "s", "id": 13021, "pid": 494, "tid": 494, "ts": 1742522672429903, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoD (Device -> Device)", "pid": 0, "tid": 7, "ts": 1742522672510933, "dur": 1, "args": {"External id": 13032, "device": 0, "context": 1, "stream": 7, "correlation": 13032, "bytes": 472, "memory bandwidth (GB/s)": 0.3277777777777778}}, {"ph": "f", "id": 13032, "pid": 0, "tid": 7, "ts": 1742522672510933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672429916, "dur": 7, "args": {"External id": 13032, "cbid": 41, "correlation": 13032}}, {"ph": "s", "id": 13032, "pid": 494, "tid": 494, "ts": 1742522672429916, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamIsCapturing", "pid": 494, "tid": 494, "ts": 1742522672429951, "dur": 1, "args": {"External id": 13039, "cbid": 317, "correlation": 13039}}, {"ph": "f", "id": 13039, "pid": 494, "tid": 494, "ts": 1742522672429951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672429961, "dur": 1, "args": {"External id": 13045, "cbid": 147, "correlation": 13045}}, {"ph": "s", "id": 13045, "pid": 494, "tid": 494, "ts": 1742522672429961, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamGetCaptureInfo_v2", "pid": 494, "tid": 494, "ts": 1742522672429975, "dur": 1, "args": {"External id": 13049, "cbid": 409, "correlation": 13049}}, {"ph": "f", "id": 13049, "pid": 494, "tid": 494, "ts": 1742522672429975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672429989, "dur": 1, "args": {"External id": 13054, "cbid": 147, "correlation": 13054}}, {"ph": "s", "id": 13054, "pid": 494, "tid": 494, "ts": 1742522672429989, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaGetFuncBySymbol", "pid": 494, "tid": 494, "ts": 1742522672429991, "dur": 0, "args": {"External id": 13056, "cbid": 336, "correlation": 13056}}, {"ph": "f", "id": 13056, "pid": 494, "tid": 494, "ts": 1742522672429991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "kernel", "name": "ncclDevKernel_AllReduce_Sum_u64_TREE_LL(ncclDevKernelArgsStorage<4096ul>)", "pid": 0, "tid": 13, "ts": 1742522672510936, "dur": 155, "args": {"External id": 13057, "queued": 0, "device": 0, "context": 1, "stream": 13, "correlation": 13057, "registers per thread": 96, "shared memory": 103776, "blocks per SM": 0.060606062, "warps per SM": 1.2121212, "grid": [8, 1, 1], "block": [640, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 13057, "pid": 0, "tid": 13, "ts": 1742522672510936, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672430033, "dur": 0, "args": {"External id": 13060, "cbid": 147, "correlation": 13060}}, {"ph": "s", "id": 13060, "pid": 494, "tid": 494, "ts": 1742522672430033, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672430051, "dur": 4, "args": {"External id": 13079, "cbid": 147, "correlation": 13079}}, {"ph": "s", "id": 13079, "pid": 494, "tid": 494, "ts": 1742522672430051, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamWaitEvent", "pid": 494, "tid": 494, "ts": 1742522672430074, "dur": 1, "args": {"External id": 13086, "cbid": 147, "correlation": 13086}}, {"ph": "s", "id": 13086, "pid": 494, "tid": 494, "ts": 1742522672430074, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1> >(int, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672511092, "dur": 2, "args": {"External id": 13102, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 13102, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.11363637, "warps per SM": 0.45454547, "grid": [15, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 1}}, {"ph": "f", "id": 13102, "pid": 0, "tid": 7, "ts": 1742522672511092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672430101, "dur": 9, "args": {"External id": 13102, "cbid": 211, "correlation": 13102}}, {"ph": "s", "id": 13102, "pid": 494, "tid": 494, "ts": 1742522672430101, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::MeanOps<float, float, float, float>, unsigned int, float, 4> >(at::native::ReduceOp<float, at::native::MeanOps<float, float, float, float>, unsigned int, float, 4>)", "pid": 0, "tid": 7, "ts": 1742522672511096, "dur": 8, "args": {"External id": 13116, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 13116, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.007575758, "warps per SM": 0.121212125, "grid": [1, 1, 1], "block": [32, 16, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 13116, "pid": 0, "tid": 7, "ts": 1742522672511096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672430131, "dur": 5, "args": {"External id": 13116, "cbid": 211, "correlation": 13116}}, {"ph": "s", "id": 13116, "pid": 494, "tid": 494, "ts": 1742522672430131, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": 7, "ts": 1742522672511106, "dur": 3, "args": {"External id": 13130, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 13130, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.007575758, "warps per SM": 0.007575758, "grid": [1, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 13130, "pid": 0, "tid": 7, "ts": 1742522672511106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672430148, "dur": 3, "args": {"External id": 13130, "cbid": 211, "correlation": 13130}}, {"ph": "s", "id": 13130, "pid": 494, "tid": 494, "ts": 1742522672430148, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672430159, "dur": 1, "args": {"External id": 13134, "cbid": 138, "correlation": 13134}}, {"ph": "f", "id": 13134, "pid": 494, "tid": 494, "ts": 1742522672430159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672511115, "dur": 2, "args": {"External id": 13138, "device": 0, "context": 1, "stream": 7, "correlation": 13138, "bytes": 4, "memory bandwidth (GB/s)": 0.0017605633802816902}}, {"ph": "f", "id": 13138, "pid": 0, "tid": 7, "ts": 1742522672511115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672430164, "dur": 9, "args": {"External id": 13138, "cbid": 41, "correlation": 13138}}, {"ph": "s", "id": 13138, "pid": 494, "tid": 494, "ts": 1742522672430164, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::unrolled_elementwise_kernel<at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1> >(int, at::native::direct_copy_kernel_cuda(at::TensorIteratorBase&)::{lambda()#2}::operator()() const::{lambda()#7}::operator()() const::{lambda(float)#1}, at::detail::Array<char*, 2>, TrivialOffsetCalculator<1, unsigned int>, TrivialOffsetCalculator<1, unsigned int>, at::native::memory::LoadWithCast<1>, at::native::memory::StoreWithCast<1>)", "pid": 0, "tid": 7, "ts": 1742522672511124, "dur": 2, "args": {"External id": 13150, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 13150, "registers per thread": 32, "shared memory": 0, "blocks per SM": 0.11363637, "warps per SM": 0.45454547, "grid": [15, 1, 1], "block": [128, 1, 1], "est. achieved occupancy %": 1}}, {"ph": "f", "id": 13150, "pid": 0, "tid": 7, "ts": 1742522672511124, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672430183, "dur": 3, "args": {"External id": 13150, "cbid": 211, "correlation": 13150}}, {"ph": "s", "id": 13150, "pid": 494, "tid": 494, "ts": 1742522672430183, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::MaxOps<float>, unsigned int, float, 4> >(at::native::ReduceOp<float, at::native::MaxOps<float>, unsigned int, float, 4>)", "pid": 0, "tid": 7, "ts": 1742522672511128, "dur": 16, "args": {"External id": 13168, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 13168, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.007575758, "warps per SM": 0.121212125, "grid": [1, 1, 1], "block": [32, 16, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 13168, "pid": 0, "tid": 7, "ts": 1742522672511128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672430210, "dur": 4, "args": {"External id": 13168, "cbid": 211, "correlation": 13168}}, {"ph": "s", "id": 13168, "pid": 494, "tid": 494, "ts": 1742522672430210, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "kernel", "name": "void at::native::reduce_kernel<512, 1, at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4> >(at::native::ReduceOp<float, at::native::func_wrapper_t<float, at::native::sum_functor<float, float, float>::operator()(at::TensorIterator&)::{lambda(float, float)#1}>, unsigned int, float, 4>)", "pid": 0, "tid": 7, "ts": 1742522672511146, "dur": 2, "args": {"External id": 13182, "queued": 0, "device": 0, "context": 1, "stream": 7, "correlation": 13182, "registers per thread": 32, "shared memory": 16, "blocks per SM": 0.007575758, "warps per SM": 0.007575758, "grid": [1, 1, 1], "block": [32, 1, 1], "est. achieved occupancy %": 0}}, {"ph": "f", "id": 13182, "pid": 0, "tid": 7, "ts": 1742522672511146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaLaunchKernel", "pid": 494, "tid": 494, "ts": 1742522672430226, "dur": 3, "args": {"External id": 13182, "cbid": 211, "correlation": 13182}}, {"ph": "s", "id": 13182, "pid": 494, "tid": 494, "ts": 1742522672430226, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaEventQuery", "pid": 494, "tid": 494, "ts": 1742522672430234, "dur": 0, "args": {"External id": 13186, "cbid": 138, "correlation": 13186}}, {"ph": "f", "id": 13186, "pid": 494, "tid": 494, "ts": 1742522672430234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "gpu_memcpy", "name": "Memcpy DtoH (Device -> Pinned)", "pid": 0, "tid": 7, "ts": 1742522672511156, "dur": 2, "args": {"External id": 13190, "device": 0, "context": 1, "stream": 7, "correlation": 13190, "bytes": 4, "memory bandwidth (GB/s)": 0.0018656716417910447}}, {"ph": "f", "id": 13190, "pid": 0, "tid": 7, "ts": 1742522672511156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaMemcpyAsync", "pid": 494, "tid": 494, "ts": 1742522672430237, "dur": 4, "args": {"External id": 13190, "cbid": 41, "correlation": 13190}}, {"ph": "s", "id": 13190, "pid": 494, "tid": 494, "ts": 1742522672430237, "cat": "ac2g", "name": "ac2g"}, {"ph": "X", "cat": "cuda_runtime", "name": "cudaStreamSynchronize", "pid": 494, "tid": 494, "ts": 1742522672430293, "dur": 80892, "args": {"External id": 13201, "cbid": 131, "correlation": 13201}}, {"ph": "s", "id": 13201, "pid": 494, "tid": 494, "ts": 1742522672430293, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13267, "pid": 494, "tid": 494, "ts": 1742522672512767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13274, "pid": 0, "tid": 7, "ts": 1742522672512836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13274, "pid": 494, "tid": 494, "ts": 1742522672512806, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13275, "pid": 494, "tid": 494, "ts": 1742522672512837, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13279, "pid": 494, "tid": 494, "ts": 1742522672512945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13285, "pid": 494, "tid": 494, "ts": 1742522672512969, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13289, "pid": 494, "tid": 494, "ts": 1742522672513009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13294, "pid": 494, "tid": 494, "ts": 1742522672513030, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13296, "pid": 494, "tid": 494, "ts": 1742522672513033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13297, "pid": 0, "tid": 13, "ts": 1742522672513106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13300, "pid": 494, "tid": 494, "ts": 1742522672513118, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13319, "pid": 494, "tid": 494, "ts": 1742522672513150, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13326, "pid": 494, "tid": 494, "ts": 1742522672513200, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13335, "pid": 0, "tid": 7, "ts": 1742522672513358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13335, "pid": 494, "tid": 494, "ts": 1742522672513223, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13336, "pid": 494, "tid": 494, "ts": 1742522672513365, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13347, "pid": 494, "tid": 494, "ts": 1742522672513423, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13354, "pid": 0, "tid": 7, "ts": 1742522672513482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13354, "pid": 494, "tid": 494, "ts": 1742522672513446, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13359, "pid": 494, "tid": 494, "ts": 1742522672513492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13365, "pid": 494, "tid": 494, "ts": 1742522672513499, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13369, "pid": 494, "tid": 494, "ts": 1742522672513507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13374, "pid": 494, "tid": 494, "ts": 1742522672513514, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13376, "pid": 494, "tid": 494, "ts": 1742522672513516, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13377, "pid": 0, "tid": 13, "ts": 1742522672513545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13380, "pid": 494, "tid": 494, "ts": 1742522672513549, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13399, "pid": 494, "tid": 494, "ts": 1742522672513563, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13406, "pid": 494, "tid": 494, "ts": 1742522672513577, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13415, "pid": 0, "tid": 7, "ts": 1742522672513663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13415, "pid": 494, "tid": 494, "ts": 1742522672513587, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13416, "pid": 494, "tid": 494, "ts": 1742522672513670, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13428, "pid": 494, "tid": 494, "ts": 1742522672514003, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13435, "pid": 0, "tid": 7, "ts": 1742522672514026, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13435, "pid": 494, "tid": 494, "ts": 1742522672514017, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13436, "pid": 494, "tid": 494, "ts": 1742522672514025, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13449, "pid": 0, "tid": 7, "ts": 1742522672514045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13449, "pid": 494, "tid": 494, "ts": 1742522672514039, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13450, "pid": 494, "tid": 494, "ts": 1742522672514043, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13463, "pid": 0, "tid": 7, "ts": 1742522672514061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13463, "pid": 494, "tid": 494, "ts": 1742522672514056, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13464, "pid": 494, "tid": 494, "ts": 1742522672514059, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13477, "pid": 0, "tid": 7, "ts": 1742522672514076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13477, "pid": 494, "tid": 494, "ts": 1742522672514071, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13478, "pid": 494, "tid": 494, "ts": 1742522672514075, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13491, "pid": 0, "tid": 7, "ts": 1742522672514095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13491, "pid": 494, "tid": 494, "ts": 1742522672514088, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13492, "pid": 494, "tid": 494, "ts": 1742522672514093, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13505, "pid": 0, "tid": 7, "ts": 1742522672514113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13505, "pid": 494, "tid": 494, "ts": 1742522672514108, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13506, "pid": 494, "tid": 494, "ts": 1742522672514111, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13527, "pid": 0, "tid": 7, "ts": 1742522672514133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13527, "pid": 494, "tid": 494, "ts": 1742522672514128, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13528, "pid": 494, "tid": 494, "ts": 1742522672514132, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13541, "pid": 0, "tid": 7, "ts": 1742522672514271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13541, "pid": 494, "tid": 494, "ts": 1742522672514264, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13542, "pid": 494, "tid": 494, "ts": 1742522672514269, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13555, "pid": 0, "tid": 7, "ts": 1742522672514287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13555, "pid": 494, "tid": 494, "ts": 1742522672514282, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13556, "pid": 494, "tid": 494, "ts": 1742522672514286, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13569, "pid": 0, "tid": 7, "ts": 1742522672514304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13569, "pid": 494, "tid": 494, "ts": 1742522672514299, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13570, "pid": 494, "tid": 494, "ts": 1742522672514302, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13591, "pid": 0, "tid": 7, "ts": 1742522672514324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13591, "pid": 494, "tid": 494, "ts": 1742522672514320, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13592, "pid": 494, "tid": 494, "ts": 1742522672514323, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13613, "pid": 0, "tid": 7, "ts": 1742522672514347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13613, "pid": 494, "tid": 494, "ts": 1742522672514342, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13614, "pid": 494, "tid": 494, "ts": 1742522672514345, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13625, "pid": 0, "tid": 7, "ts": 1742522672514544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13625, "pid": 494, "tid": 494, "ts": 1742522672514528, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13642, "pid": 0, "tid": 7, "ts": 1742522672514569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13642, "pid": 494, "tid": 494, "ts": 1742522672514562, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13659, "pid": 0, "tid": 7, "ts": 1742522672514587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13659, "pid": 494, "tid": 494, "ts": 1742522672514580, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13676, "pid": 0, "tid": 7, "ts": 1742522672514606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13676, "pid": 494, "tid": 494, "ts": 1742522672514599, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13696, "pid": 0, "tid": 7, "ts": 1742522672514625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13696, "pid": 494, "tid": 494, "ts": 1742522672514619, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13722, "pid": 0, "tid": 7, "ts": 1742522672514684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13722, "pid": 494, "tid": 494, "ts": 1742522672514647, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13735, "pid": 0, "tid": 7, "ts": 1742522672514712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13735, "pid": 494, "tid": 494, "ts": 1742522672514704, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13753, "pid": 0, "tid": 7, "ts": 1742522672514757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13753, "pid": 494, "tid": 494, "ts": 1742522672514749, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13754, "pid": 494, "tid": 494, "ts": 1742522672514764, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13760, "pid": 0, "tid": 7, "ts": 1742522672514803, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13760, "pid": 494, "tid": 494, "ts": 1742522672514798, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 13761, "pid": 494, "tid": 494, "ts": 1742522672514802, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13771, "pid": 494, "tid": 494, "ts": 1742522672514825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13775, "pid": 494, "tid": 494, "ts": 1742522672514827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13778, "pid": 0, "tid": 7, "ts": 1742522672514861, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13778, "pid": 494, "tid": 494, "ts": 1742522672514832, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13784, "pid": 0, "tid": 7, "ts": 1742522672514869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13784, "pid": 494, "tid": 494, "ts": 1742522672514864, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672514928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672514929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672514941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672514946, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672514961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672514964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672514966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672514994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515513, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672515968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672516435, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672516465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672516491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672516500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672518982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672518987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672518992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519226, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672519918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520157, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520174, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520247, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520686, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672520735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521429, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521590, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672521874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522216, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522223, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522342, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522856, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672522994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523032, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523235, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523631, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523751, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672523997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524017, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524435, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524513, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524579, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524665, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524705, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672524995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525000, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525064, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525142, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525157, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525175, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525837, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672525937, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526398, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526530, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526608, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526652, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672526854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527247, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527302, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527921, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672527996, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528024, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528223, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672528997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529429, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529466, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672529985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530040, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530157, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530357, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530758, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530862, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672530986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672531844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532174, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532215, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532255, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532330, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532405, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532480, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532819, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672532991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533244, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533603, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533683, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533916, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672533978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534249, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534254, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534319, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534330, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534405, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534423, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534944, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672534966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535032, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535086, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535124, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535142, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535375, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535396, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672535960, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536355, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536384, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536454, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536540, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536545, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536753, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672536825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537102, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537165, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537516, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537814, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537876, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537880, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672537990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538488, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538676, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538739, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538881, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538921, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672538942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539278, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539357, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539386, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539991, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672539995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540005, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540090, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540652, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540803, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540846, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672540934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541495, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541514, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541533, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541553, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672541798, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542067, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542123, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542147, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542165, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542357, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542850, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542937, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542951, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672542993, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543477, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543494, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543618, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543636, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543655, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543669, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543711, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672543929, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544261, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544363, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544379, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544643, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544983, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672544994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545088, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545090, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545092, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545697, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672545920, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546000, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546398, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546443, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546503, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546516, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546540, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546566, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672546776, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547032, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547121, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547129, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547226, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547825, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547846, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547960, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672547968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548429, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548441, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548512, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548880, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672548901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549227, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549231, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549244, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549995, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672549999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550043, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550053, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550334, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550610, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550665, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550678, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550703, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550707, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550746, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550779, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550803, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550898, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672550987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551291, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551302, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551324, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551375, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551380, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551386, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551460, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672551756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552000, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552149, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552172, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552203, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552746, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552751, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552816, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552819, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552865, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552891, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552960, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672552969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553117, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553450, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553517, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553530, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553649, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553675, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553739, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553820, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672553890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554231, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554234, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554302, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554384, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554947, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672554997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555016, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555020, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555035, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555049, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555052, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555278, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555669, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672555964, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556026, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556302, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556364, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556396, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556418, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556443, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556454, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556513, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672556990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557007, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557124, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557165, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557178, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557181, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557203, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557228, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557441, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557683, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557711, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557856, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557890, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672557935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558000, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558382, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558469, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558585, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558711, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672558849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559218, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559299, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559319, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559344, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559946, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559968, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672559987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560000, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560004, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560559, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560628, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560631, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560705, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560720, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560774, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560783, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560921, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560960, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672560979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561346, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561403, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561425, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561686, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561707, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672561982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562044, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562047, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562124, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562380, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562399, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562724, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562810, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562814, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562824, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672562979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563110, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563385, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563401, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563524, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563542, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563579, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563599, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563775, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672563828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564103, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564166, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564257, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564295, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564872, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564879, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564913, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564967, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564988, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672564990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565036, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565504, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565592, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565630, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565819, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565906, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565946, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672565966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566209, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566220, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566338, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566382, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566397, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672566961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567016, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567026, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567037, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567057, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567079, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567116, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567154, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567633, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567661, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567786, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672567955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568344, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568460, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568504, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568541, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568741, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568748, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672568801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569043, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569077, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569142, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569215, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569219, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569235, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569504, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569870, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569921, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569959, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569975, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672569996, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570231, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570473, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570575, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570623, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570701, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570726, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672570945, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571216, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571284, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571587, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571974, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571978, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672571998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572016, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572019, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572058, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572073, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572078, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572115, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572300, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572611, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572706, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572712, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572740, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572814, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572816, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572856, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672572934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573063, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573416, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573422, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573425, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573475, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573478, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573578, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573737, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672573797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574066, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574147, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574164, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574211, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574226, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574282, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574441, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574874, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574894, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574915, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574919, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672574997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575223, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575476, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575497, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575552, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575559, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575562, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575590, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575615, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575648, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575669, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575707, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575864, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672575926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576167, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576341, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576356, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576394, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576881, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576909, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576914, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576966, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576970, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576982, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672576990, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577011, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577032, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577050, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577065, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577070, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577124, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577133, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577278, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577286, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577684, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577690, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577697, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577736, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577739, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577979, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672577987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578019, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578040, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578371, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578378, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578381, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578383, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578455, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578466, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578471, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578484, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578526, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578616, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672578998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579026, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579031, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579099, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579148, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579202, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579206, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579413, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579751, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579802, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579806, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579815, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579869, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579888, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579892, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579903, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579921, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579925, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579946, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672579971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580130, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580170, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580519, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580529, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580535, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580608, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580619, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580679, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580687, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580747, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672580884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581125, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581212, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581219, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581272, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581308, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581313, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581542, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581949, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581955, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581963, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672581984, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582005, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582037, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582042, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582056, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582059, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582081, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582098, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582254, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582262, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582303, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582598, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582674, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582691, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582713, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582716, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582767, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582772, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582962, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672582969, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583021, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583264, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583276, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583297, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583352, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583359, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583362, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583372, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583415, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583437, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583466, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583468, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583507, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583516, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583722, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583744, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672583997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584018, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584082, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584088, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584113, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584117, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584138, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584157, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584171, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584304, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584385, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584453, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584697, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584708, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584730, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584797, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584805, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584845, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584848, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584867, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584871, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584881, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584900, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584941, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672584950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585087, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585401, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585489, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585499, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585509, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585530, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585569, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585584, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585602, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585627, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585653, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585796, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585804, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672585866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586134, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586190, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586204, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586207, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586235, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586279, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586307, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586310, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586331, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586357, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586508, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586893, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586907, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586909, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586917, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586959, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672586996, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587014, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587034, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587136, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587217, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587610, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587624, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587626, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587651, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587655, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587674, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587677, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587728, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587940, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587973, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672587994, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588248, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588321, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588325, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588335, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588345, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588364, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588385, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588410, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588421, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588426, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588482, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588649, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588689, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588711, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588965, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672588986, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589037, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589041, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589062, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589080, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589104, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589141, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589155, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589196, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589351, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589419, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589756, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589759, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589790, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589808, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589811, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589830, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589844, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589866, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589887, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672589985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590060, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590068, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590377, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590398, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590464, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590472, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590514, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590551, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590589, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590821, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672590842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591118, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591173, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591179, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591185, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591233, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591254, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591259, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591288, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591292, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591337, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591801, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591896, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591899, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591902, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591910, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591931, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591953, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591972, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591976, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591987, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672591992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592005, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592010, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592046, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592119, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592200, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592208, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592249, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592522, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592539, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592544, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592596, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592600, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592620, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592660, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592681, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592700, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592739, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592765, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592828, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592916, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672592977, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593218, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593230, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593246, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593315, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593318, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593347, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593402, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593407, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593423, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593425, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593533, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593622, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593924, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593935, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593952, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672593958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594013, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594019, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594022, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594025, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594033, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594075, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594093, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594108, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594129, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594131, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594151, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594176, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594243, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594322, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594329, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594392, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594647, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594718, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594761, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594764, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594785, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594803, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594807, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594818, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594860, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594886, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672594954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595038, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595071, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595344, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595360, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595416, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595427, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595430, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595440, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595480, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595483, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595501, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595515, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595520, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595583, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595751, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672595819, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596061, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596072, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596089, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596094, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596150, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596156, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596162, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596187, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596210, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596232, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596236, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596247, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596265, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596267, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596270, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596492, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596513, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596782, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596788, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596849, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596852, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596855, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596863, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596880, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596884, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596923, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596927, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596938, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596942, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596959, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596981, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672596998, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597083, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597168, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597201, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597221, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597550, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597563, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597570, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597592, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597612, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597631, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597646, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597664, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597705, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597713, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597873, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597882, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597922, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672597943, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598186, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598197, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598219, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598269, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598293, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598314, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598333, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598358, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598368, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598373, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598389, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598506, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598635, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598657, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598912, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598928, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598933, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598985, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598989, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598996, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672598999, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599002, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599009, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599027, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599030, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599048, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599051, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599074, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599084, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599089, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599102, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599107, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599126, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599152, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599218, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599306, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599346, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599367, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599637, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599698, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599707, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599757, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599780, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599794, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599799, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599812, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599814, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599816, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599853, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599861, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672599926, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600006, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600015, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600054, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600076, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600317, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600328, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600344, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600350, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600401, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600405, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600411, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600414, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600417, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600463, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600486, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600523, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600560, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600568, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600632, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600710, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600750, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672600771, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601012, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601023, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601039, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601045, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601096, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601100, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601109, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601112, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601120, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601137, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601140, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601158, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601161, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601180, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601184, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601199, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601213, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601215, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601218, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601237, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601254, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601263, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601348, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601425, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601465, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601738, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601754, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601760, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601823, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601826, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601829, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601857, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601875, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601878, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601897, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601901, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601911, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601916, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601930, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601932, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601934, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601954, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601971, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672601980, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602055, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602135, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602183, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602205, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602458, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602536, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602548, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602556, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602574, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602599, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602617, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602631, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602769, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602850, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602883, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672602904, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603268, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603279, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603283, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603287, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603299, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13789, "pid": 0, "tid": 7, "ts": 1742522672603309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13789, "pid": 494, "tid": 494, "ts": 1742522672514869, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13790, "pid": 494, "tid": 494, "ts": 1742522672518961, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13815, "pid": 0, "tid": 7, "ts": 1742522672603320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13815, "pid": 494, "tid": 494, "ts": 1742522672519242, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13833, "pid": 0, "tid": 7, "ts": 1742522672603326, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13833, "pid": 494, "tid": 494, "ts": 1742522672519371, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13846, "pid": 494, "tid": 494, "ts": 1742522672519447, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13847, "pid": 0, "tid": 7, "ts": 1742522672603340, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13847, "pid": 494, "tid": 494, "ts": 1742522672519472, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13848, "pid": 494, "tid": 494, "ts": 1742522672519487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13849, "pid": 494, "tid": 494, "ts": 1742522672519490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13850, "pid": 494, "tid": 494, "ts": 1742522672519491, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13851, "pid": 0, "tid": 7, "ts": 1742522672603343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13851, "pid": 494, "tid": 494, "ts": 1742522672519492, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13856, "pid": 494, "tid": 494, "ts": 1742522672519694, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13857, "pid": 494, "tid": 494, "ts": 1742522672519699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13858, "pid": 494, "tid": 494, "ts": 1742522672519702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13859, "pid": 494, "tid": 494, "ts": 1742522672519705, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13860, "pid": 494, "tid": 494, "ts": 1742522672519707, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13861, "pid": 494, "tid": 494, "ts": 1742522672519709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13862, "pid": 494, "tid": 494, "ts": 1742522672519711, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13863, "pid": 494, "tid": 494, "ts": 1742522672519713, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13864, "pid": 494, "tid": 494, "ts": 1742522672519715, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13865, "pid": 494, "tid": 494, "ts": 1742522672519717, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13866, "pid": 494, "tid": 494, "ts": 1742522672519719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13867, "pid": 494, "tid": 494, "ts": 1742522672519721, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13868, "pid": 494, "tid": 494, "ts": 1742522672519723, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13869, "pid": 494, "tid": 494, "ts": 1742522672519725, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13870, "pid": 494, "tid": 494, "ts": 1742522672519727, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13871, "pid": 494, "tid": 494, "ts": 1742522672519729, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13872, "pid": 494, "tid": 494, "ts": 1742522672519731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13873, "pid": 494, "tid": 494, "ts": 1742522672519733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13874, "pid": 494, "tid": 494, "ts": 1742522672519735, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 13884, "pid": 0, "tid": 7, "ts": 1742522672604106, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13884, "pid": 494, "tid": 494, "ts": 1742522672519824, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13896, "pid": 0, "tid": 7, "ts": 1742522672604111, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13896, "pid": 494, "tid": 494, "ts": 1742522672519846, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13908, "pid": 0, "tid": 7, "ts": 1742522672604117, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13908, "pid": 494, "tid": 494, "ts": 1742522672519858, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13920, "pid": 0, "tid": 7, "ts": 1742522672604122, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13920, "pid": 494, "tid": 494, "ts": 1742522672519870, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13932, "pid": 0, "tid": 7, "ts": 1742522672604127, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13932, "pid": 494, "tid": 494, "ts": 1742522672519881, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13944, "pid": 0, "tid": 7, "ts": 1742522672604132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13944, "pid": 494, "tid": 494, "ts": 1742522672519892, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13984, "pid": 0, "tid": 7, "ts": 1742522672604139, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13984, "pid": 494, "tid": 494, "ts": 1742522672520292, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 13995, "pid": 0, "tid": 7, "ts": 1742522672604146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 13995, "pid": 494, "tid": 494, "ts": 1742522672520345, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14015, "pid": 0, "tid": 7, "ts": 1742522672604153, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14015, "pid": 494, "tid": 494, "ts": 1742522672520385, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14034, "pid": 494, "tid": 494, "ts": 1742522672520408, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14035, "pid": 0, "tid": 7, "ts": 1742522672604225, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14035, "pid": 494, "tid": 494, "ts": 1742522672520410, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14046, "pid": 0, "tid": 7, "ts": 1742522672604384, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14046, "pid": 494, "tid": 494, "ts": 1742522672520437, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14064, "pid": 0, "tid": 7, "ts": 1742522672604390, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14064, "pid": 494, "tid": 494, "ts": 1742522672520470, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14070, "pid": 0, "tid": 7, "ts": 1742522672604393, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14070, "pid": 494, "tid": 494, "ts": 1742522672520484, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14076, "pid": 0, "tid": 7, "ts": 1742522672604400, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14076, "pid": 494, "tid": 494, "ts": 1742522672520493, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14081, "pid": 0, "tid": 7, "ts": 1742522672604404, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14081, "pid": 494, "tid": 494, "ts": 1742522672520506, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14087, "pid": 0, "tid": 7, "ts": 1742522672604431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14087, "pid": 494, "tid": 494, "ts": 1742522672520521, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14105, "pid": 0, "tid": 7, "ts": 1742522672604442, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14105, "pid": 494, "tid": 494, "ts": 1742522672520539, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14118, "pid": 0, "tid": 7, "ts": 1742522672604446, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14118, "pid": 494, "tid": 494, "ts": 1742522672520578, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14128, "pid": 0, "tid": 7, "ts": 1742522672604459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14128, "pid": 494, "tid": 494, "ts": 1742522672520602, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14139, "pid": 0, "tid": 7, "ts": 1742522672604467, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14139, "pid": 494, "tid": 494, "ts": 1742522672520630, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14149, "pid": 0, "tid": 7, "ts": 1742522672604474, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14149, "pid": 494, "tid": 494, "ts": 1742522672520648, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14162, "pid": 0, "tid": 7, "ts": 1742522672604479, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14162, "pid": 494, "tid": 494, "ts": 1742522672520677, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14168, "pid": 0, "tid": 7, "ts": 1742522672604502, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14168, "pid": 494, "tid": 494, "ts": 1742522672520690, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14187, "pid": 0, "tid": 7, "ts": 1742522672604527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14187, "pid": 494, "tid": 494, "ts": 1742522672520730, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14193, "pid": 0, "tid": 7, "ts": 1742522672604554, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14193, "pid": 494, "tid": 494, "ts": 1742522672520742, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14206, "pid": 0, "tid": 7, "ts": 1742522672604577, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14206, "pid": 494, "tid": 494, "ts": 1742522672520763, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14216, "pid": 0, "tid": 7, "ts": 1742522672604581, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14216, "pid": 494, "tid": 494, "ts": 1742522672520784, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14226, "pid": 0, "tid": 7, "ts": 1742522672604586, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14226, "pid": 494, "tid": 494, "ts": 1742522672520814, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14236, "pid": 0, "tid": 7, "ts": 1742522672604590, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14236, "pid": 494, "tid": 494, "ts": 1742522672520838, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14246, "pid": 0, "tid": 7, "ts": 1742522672604594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14246, "pid": 494, "tid": 494, "ts": 1742522672520860, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14259, "pid": 0, "tid": 7, "ts": 1742522672604597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14259, "pid": 494, "tid": 494, "ts": 1742522672520881, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14277, "pid": 0, "tid": 7, "ts": 1742522672604601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14277, "pid": 494, "tid": 494, "ts": 1742522672520905, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14287, "pid": 0, "tid": 7, "ts": 1742522672604605, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14287, "pid": 494, "tid": 494, "ts": 1742522672520924, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14305, "pid": 494, "tid": 494, "ts": 1742522672520950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14307, "pid": 0, "tid": 7, "ts": 1742522672604609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14307, "pid": 494, "tid": 494, "ts": 1742522672520953, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14317, "pid": 0, "tid": 7, "ts": 1742522672604613, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14317, "pid": 494, "tid": 494, "ts": 1742522672520973, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14327, "pid": 0, "tid": 7, "ts": 1742522672604618, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14327, "pid": 494, "tid": 494, "ts": 1742522672520991, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14337, "pid": 0, "tid": 7, "ts": 1742522672604639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14337, "pid": 494, "tid": 494, "ts": 1742522672521002, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14347, "pid": 0, "tid": 7, "ts": 1742522672604644, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14347, "pid": 494, "tid": 494, "ts": 1742522672521012, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14354, "pid": 0, "tid": 7, "ts": 1742522672604650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14354, "pid": 494, "tid": 494, "ts": 1742522672521030, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14370, "pid": 0, "tid": 7, "ts": 1742522672604654, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14370, "pid": 494, "tid": 494, "ts": 1742522672521054, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14380, "pid": 0, "tid": 7, "ts": 1742522672604667, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14380, "pid": 494, "tid": 494, "ts": 1742522672521074, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14398, "pid": 494, "tid": 494, "ts": 1742522672521097, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14400, "pid": 0, "tid": 7, "ts": 1742522672604673, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14400, "pid": 494, "tid": 494, "ts": 1742522672521100, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14406, "pid": 0, "tid": 7, "ts": 1742522672604680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14406, "pid": 494, "tid": 494, "ts": 1742522672521113, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14420, "pid": 0, "tid": 7, "ts": 1742522672604685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14420, "pid": 494, "tid": 494, "ts": 1742522672521135, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14430, "pid": 0, "tid": 7, "ts": 1742522672604704, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14430, "pid": 494, "tid": 494, "ts": 1742522672521152, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14448, "pid": 0, "tid": 7, "ts": 1742522672604709, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14448, "pid": 494, "tid": 494, "ts": 1742522672521180, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14461, "pid": 0, "tid": 7, "ts": 1742522672604714, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14461, "pid": 494, "tid": 494, "ts": 1742522672521200, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14476, "pid": 0, "tid": 7, "ts": 1742522672604719, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14476, "pid": 494, "tid": 494, "ts": 1742522672521226, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14479, "pid": 494, "tid": 494, "ts": 1742522672521238, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14480, "pid": 494, "tid": 494, "ts": 1742522672521240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14481, "pid": 494, "tid": 494, "ts": 1742522672521240, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14482, "pid": 494, "tid": 494, "ts": 1742522672521241, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14483, "pid": 494, "tid": 494, "ts": 1742522672521242, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14484, "pid": 494, "tid": 494, "ts": 1742522672521247, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14485, "pid": 494, "tid": 494, "ts": 1742522672521250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14486, "pid": 494, "tid": 494, "ts": 1742522672521250, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14487, "pid": 494, "tid": 494, "ts": 1742522672521251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14488, "pid": 494, "tid": 494, "ts": 1742522672521251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14489, "pid": 494, "tid": 494, "ts": 1742522672521252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14490, "pid": 494, "tid": 494, "ts": 1742522672521252, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14491, "pid": 494, "tid": 494, "ts": 1742522672521253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14493, "pid": 0, "tid": 7, "ts": 1742522672604755, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14493, "pid": 494, "tid": 494, "ts": 1742522672521255, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14503, "pid": 0, "tid": 7, "ts": 1742522672604762, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14503, "pid": 494, "tid": 494, "ts": 1742522672521269, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14516, "pid": 0, "tid": 7, "ts": 1742522672604770, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14516, "pid": 494, "tid": 494, "ts": 1742522672521286, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14534, "pid": 0, "tid": 7, "ts": 1742522672604773, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14534, "pid": 494, "tid": 494, "ts": 1742522672521301, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14546, "pid": 0, "tid": 7, "ts": 1742522672604777, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14546, "pid": 494, "tid": 494, "ts": 1742522672521318, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14558, "pid": 0, "tid": 7, "ts": 1742522672604781, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14558, "pid": 494, "tid": 494, "ts": 1742522672521338, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14569, "pid": 0, "tid": 7, "ts": 1742522672604784, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14569, "pid": 494, "tid": 494, "ts": 1742522672521357, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14584, "pid": 0, "tid": 7, "ts": 1742522672604789, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14584, "pid": 494, "tid": 494, "ts": 1742522672521374, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14597, "pid": 0, "tid": 7, "ts": 1742522672604792, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14597, "pid": 494, "tid": 494, "ts": 1742522672521383, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14615, "pid": 0, "tid": 7, "ts": 1742522672604795, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14615, "pid": 494, "tid": 494, "ts": 1742522672521395, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14626, "pid": 0, "tid": 7, "ts": 1742522672604800, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14626, "pid": 494, "tid": 494, "ts": 1742522672521405, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14652, "pid": 0, "tid": 7, "ts": 1742522672604803, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14652, "pid": 494, "tid": 494, "ts": 1742522672521429, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14663, "pid": 0, "tid": 7, "ts": 1742522672604809, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14663, "pid": 494, "tid": 494, "ts": 1742522672521441, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14674, "pid": 0, "tid": 7, "ts": 1742522672604813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14674, "pid": 494, "tid": 494, "ts": 1742522672521452, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14697, "pid": 0, "tid": 7, "ts": 1742522672604817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14697, "pid": 494, "tid": 494, "ts": 1742522672521469, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14715, "pid": 0, "tid": 7, "ts": 1742522672604822, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14715, "pid": 494, "tid": 494, "ts": 1742522672521488, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14725, "pid": 0, "tid": 7, "ts": 1742522672604827, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14725, "pid": 494, "tid": 494, "ts": 1742522672521498, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14735, "pid": 0, "tid": 7, "ts": 1742522672604833, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14735, "pid": 494, "tid": 494, "ts": 1742522672521525, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14747, "pid": 0, "tid": 7, "ts": 1742522672604836, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14747, "pid": 494, "tid": 494, "ts": 1742522672521543, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14762, "pid": 0, "tid": 7, "ts": 1742522672604839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14762, "pid": 494, "tid": 494, "ts": 1742522672521559, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14780, "pid": 0, "tid": 7, "ts": 1742522672604842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14780, "pid": 494, "tid": 494, "ts": 1742522672521572, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14786, "pid": 0, "tid": 7, "ts": 1742522672604847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14786, "pid": 494, "tid": 494, "ts": 1742522672521581, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14805, "pid": 494, "tid": 494, "ts": 1742522672521597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14806, "pid": 0, "tid": 7, "ts": 1742522672604918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14806, "pid": 494, "tid": 494, "ts": 1742522672521599, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14817, "pid": 0, "tid": 7, "ts": 1742522672605085, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14817, "pid": 494, "tid": 494, "ts": 1742522672521614, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14835, "pid": 0, "tid": 7, "ts": 1742522672605091, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14835, "pid": 494, "tid": 494, "ts": 1742522672521633, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14841, "pid": 0, "tid": 7, "ts": 1742522672605095, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14841, "pid": 494, "tid": 494, "ts": 1742522672521642, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14847, "pid": 0, "tid": 7, "ts": 1742522672605101, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14847, "pid": 494, "tid": 494, "ts": 1742522672521650, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14852, "pid": 0, "tid": 7, "ts": 1742522672605105, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14852, "pid": 494, "tid": 494, "ts": 1742522672521657, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14858, "pid": 0, "tid": 7, "ts": 1742522672605132, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14858, "pid": 494, "tid": 494, "ts": 1742522672521666, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14876, "pid": 0, "tid": 7, "ts": 1742522672605144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14876, "pid": 494, "tid": 494, "ts": 1742522672521681, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14889, "pid": 0, "tid": 7, "ts": 1742522672605147, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14889, "pid": 494, "tid": 494, "ts": 1742522672521704, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14899, "pid": 0, "tid": 7, "ts": 1742522672605160, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14899, "pid": 494, "tid": 494, "ts": 1742522672521718, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14910, "pid": 0, "tid": 7, "ts": 1742522672605169, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14910, "pid": 494, "tid": 494, "ts": 1742522672521733, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14920, "pid": 0, "tid": 7, "ts": 1742522672605177, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14920, "pid": 494, "tid": 494, "ts": 1742522672521750, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14938, "pid": 494, "tid": 494, "ts": 1742522672521766, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14940, "pid": 0, "tid": 7, "ts": 1742522672605182, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14940, "pid": 494, "tid": 494, "ts": 1742522672521767, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14946, "pid": 0, "tid": 7, "ts": 1742522672605189, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14946, "pid": 494, "tid": 494, "ts": 1742522672521777, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14960, "pid": 0, "tid": 7, "ts": 1742522672605195, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14960, "pid": 494, "tid": 494, "ts": 1742522672521792, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14973, "pid": 0, "tid": 7, "ts": 1742522672605214, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14973, "pid": 494, "tid": 494, "ts": 1742522672521811, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14988, "pid": 0, "tid": 7, "ts": 1742522672605218, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 14988, "pid": 494, "tid": 494, "ts": 1742522672521829, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 14991, "pid": 494, "tid": 494, "ts": 1742522672521834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14992, "pid": 494, "tid": 494, "ts": 1742522672521834, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14993, "pid": 494, "tid": 494, "ts": 1742522672521835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14994, "pid": 494, "tid": 494, "ts": 1742522672521835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14995, "pid": 494, "tid": 494, "ts": 1742522672521835, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14996, "pid": 494, "tid": 494, "ts": 1742522672521839, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14997, "pid": 494, "tid": 494, "ts": 1742522672521840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14998, "pid": 494, "tid": 494, "ts": 1742522672521840, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 14999, "pid": 494, "tid": 494, "ts": 1742522672521841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 15000, "pid": 494, "tid": 494, "ts": 1742522672521841, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 15001, "pid": 494, "tid": 494, "ts": 1742522672521842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 15002, "pid": 494, "tid": 494, "ts": 1742522672521842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 15003, "pid": 494, "tid": 494, "ts": 1742522672521843, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 15005, "pid": 0, "tid": 7, "ts": 1742522672605253, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15005, "pid": 494, "tid": 494, "ts": 1742522672521844, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15015, "pid": 0, "tid": 7, "ts": 1742522672605260, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15015, "pid": 494, "tid": 494, "ts": 1742522672521858, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15025, "pid": 0, "tid": 7, "ts": 1742522672605266, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15025, "pid": 494, "tid": 494, "ts": 1742522672521871, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15046, "pid": 0, "tid": 7, "ts": 1742522672605271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15046, "pid": 494, "tid": 494, "ts": 1742522672521892, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15067, "pid": 0, "tid": 7, "ts": 1742522672605275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15067, "pid": 494, "tid": 494, "ts": 1742522672521911, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15085, "pid": 0, "tid": 7, "ts": 1742522672605281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15085, "pid": 494, "tid": 494, "ts": 1742522672521925, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15103, "pid": 0, "tid": 7, "ts": 1742522672605285, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15103, "pid": 494, "tid": 494, "ts": 1742522672521937, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15114, "pid": 0, "tid": 7, "ts": 1742522672605290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15114, "pid": 494, "tid": 494, "ts": 1742522672521954, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15134, "pid": 0, "tid": 7, "ts": 1742522672605294, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15134, "pid": 494, "tid": 494, "ts": 1742522672522055, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15147, "pid": 0, "tid": 7, "ts": 1742522672605298, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15147, "pid": 494, "tid": 494, "ts": 1742522672522070, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15154, "pid": 0, "tid": 7, "ts": 1742522672605301, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15154, "pid": 494, "tid": 494, "ts": 1742522672522083, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15167, "pid": 0, "tid": 7, "ts": 1742522672605305, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15167, "pid": 494, "tid": 494, "ts": 1742522672522103, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15177, "pid": 0, "tid": 7, "ts": 1742522672605309, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15177, "pid": 494, "tid": 494, "ts": 1742522672522122, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15187, "pid": 0, "tid": 7, "ts": 1742522672605312, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15187, "pid": 494, "tid": 494, "ts": 1742522672522134, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15197, "pid": 0, "tid": 7, "ts": 1742522672605316, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15197, "pid": 494, "tid": 494, "ts": 1742522672522150, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15207, "pid": 0, "tid": 7, "ts": 1742522672605320, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15207, "pid": 494, "tid": 494, "ts": 1742522672522160, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15220, "pid": 0, "tid": 7, "ts": 1742522672605323, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15220, "pid": 494, "tid": 494, "ts": 1742522672522186, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15231, "pid": 0, "tid": 7, "ts": 1742522672605327, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15231, "pid": 494, "tid": 494, "ts": 1742522672522200, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15244, "pid": 0, "tid": 7, "ts": 1742522672605332, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15244, "pid": 494, "tid": 494, "ts": 1742522672522216, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15262, "pid": 0, "tid": 7, "ts": 1742522672605336, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15262, "pid": 494, "tid": 494, "ts": 1742522672522231, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15272, "pid": 0, "tid": 7, "ts": 1742522672605339, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15272, "pid": 494, "tid": 494, "ts": 1742522672522254, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15282, "pid": 0, "tid": 7, "ts": 1742522672605343, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15282, "pid": 494, "tid": 494, "ts": 1742522672522266, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15303, "pid": 0, "tid": 7, "ts": 1742522672605346, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15303, "pid": 494, "tid": 494, "ts": 1742522672522288, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15313, "pid": 0, "tid": 7, "ts": 1742522672605349, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15313, "pid": 494, "tid": 494, "ts": 1742522672522298, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15323, "pid": 0, "tid": 7, "ts": 1742522672605353, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15323, "pid": 494, "tid": 494, "ts": 1742522672522308, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15333, "pid": 0, "tid": 7, "ts": 1742522672605357, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15333, "pid": 494, "tid": 494, "ts": 1742522672522316, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15346, "pid": 0, "tid": 7, "ts": 1742522672605361, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15346, "pid": 494, "tid": 494, "ts": 1742522672522335, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15357, "pid": 0, "tid": 7, "ts": 1742522672605366, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15357, "pid": 494, "tid": 494, "ts": 1742522672522347, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15370, "pid": 0, "tid": 7, "ts": 1742522672605369, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15370, "pid": 494, "tid": 494, "ts": 1742522672522373, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15380, "pid": 0, "tid": 7, "ts": 1742522672605374, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15380, "pid": 494, "tid": 494, "ts": 1742522672522389, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15390, "pid": 0, "tid": 7, "ts": 1742522672605379, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15390, "pid": 494, "tid": 494, "ts": 1742522672522402, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15400, "pid": 0, "tid": 7, "ts": 1742522672605382, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15400, "pid": 494, "tid": 494, "ts": 1742522672522425, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15410, "pid": 0, "tid": 7, "ts": 1742522672605387, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15410, "pid": 494, "tid": 494, "ts": 1742522672522436, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15422, "pid": 0, "tid": 7, "ts": 1742522672605391, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15422, "pid": 494, "tid": 494, "ts": 1742522672522461, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15439, "pid": 0, "tid": 7, "ts": 1742522672605395, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15439, "pid": 494, "tid": 494, "ts": 1742522672522509, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15452, "pid": 0, "tid": 7, "ts": 1742522672605406, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15452, "pid": 494, "tid": 494, "ts": 1742522672522540, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15470, "pid": 0, "tid": 7, "ts": 1742522672605409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15470, "pid": 494, "tid": 494, "ts": 1742522672522558, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15480, "pid": 0, "tid": 7, "ts": 1742522672605412, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15480, "pid": 494, "tid": 494, "ts": 1742522672522572, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15490, "pid": 0, "tid": 7, "ts": 1742522672605416, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15490, "pid": 494, "tid": 494, "ts": 1742522672522583, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15511, "pid": 0, "tid": 7, "ts": 1742522672605420, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15511, "pid": 494, "tid": 494, "ts": 1742522672522606, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15524, "pid": 0, "tid": 7, "ts": 1742522672605424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15524, "pid": 494, "tid": 494, "ts": 1742522672522622, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15534, "pid": 0, "tid": 7, "ts": 1742522672605428, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15534, "pid": 494, "tid": 494, "ts": 1742522672522638, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15544, "pid": 0, "tid": 7, "ts": 1742522672605433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15544, "pid": 494, "tid": 494, "ts": 1742522672522648, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15562, "pid": 0, "tid": 7, "ts": 1742522672605436, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15562, "pid": 494, "tid": 494, "ts": 1742522672522661, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15572, "pid": 0, "tid": 7, "ts": 1742522672605439, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15572, "pid": 494, "tid": 494, "ts": 1742522672522672, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15582, "pid": 0, "tid": 7, "ts": 1742522672605444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15582, "pid": 494, "tid": 494, "ts": 1742522672522681, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15619, "pid": 0, "tid": 7, "ts": 1742522672605448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15619, "pid": 494, "tid": 494, "ts": 1742522672522723, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15631, "pid": 0, "tid": 7, "ts": 1742522672605452, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15631, "pid": 494, "tid": 494, "ts": 1742522672522741, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15637, "pid": 0, "tid": 7, "ts": 1742522672605457, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15637, "pid": 494, "tid": 494, "ts": 1742522672522757, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15655, "pid": 0, "tid": 7, "ts": 1742522672605461, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15655, "pid": 494, "tid": 494, "ts": 1742522672522799, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15676, "pid": 0, "tid": 7, "ts": 1742522672605470, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15676, "pid": 494, "tid": 494, "ts": 1742522672522831, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 15682, "pid": 0, "tid": 7, "ts": 1742522672605481, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 15682, "pid": 494, "tid": 494, "ts": 1742522672522848, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16623, "pid": 0, "tid": 7, "ts": 1742522672605487, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16623, "pid": 494, "tid": 494, "ts": 1742522672523270, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16637, "pid": 0, "tid": 7, "ts": 1742522672605490, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16637, "pid": 494, "tid": 494, "ts": 1742522672523294, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16648, "pid": 0, "tid": 7, "ts": 1742522672605493, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16648, "pid": 494, "tid": 494, "ts": 1742522672523308, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16665, "pid": 0, "tid": 7, "ts": 1742522672605497, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16665, "pid": 494, "tid": 494, "ts": 1742522672523329, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16671, "pid": 0, "tid": 7, "ts": 1742522672605500, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16671, "pid": 494, "tid": 494, "ts": 1742522672523338, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16680, "pid": 0, "tid": 7, "ts": 1742522672605505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16680, "pid": 494, "tid": 494, "ts": 1742522672523368, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16702, "pid": 0, "tid": 7, "ts": 1742522672605510, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16702, "pid": 494, "tid": 494, "ts": 1742522672523419, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16718, "pid": 0, "tid": 7, "ts": 1742522672605525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16718, "pid": 494, "tid": 494, "ts": 1742522672523508, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16732, "pid": 0, "tid": 7, "ts": 1742522672605531, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16732, "pid": 494, "tid": 494, "ts": 1742522672523574, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16741, "pid": 0, "tid": 7, "ts": 1742522672605537, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16741, "pid": 494, "tid": 494, "ts": 1742522672523596, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16754, "pid": 494, "tid": 494, "ts": 1742522672523650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16755, "pid": 494, "tid": 494, "ts": 1742522672523663, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16756, "pid": 494, "tid": 494, "ts": 1742522672523665, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16757, "pid": 494, "tid": 494, "ts": 1742522672523666, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16758, "pid": 0, "tid": 7, "ts": 1742522672605547, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16758, "pid": 494, "tid": 494, "ts": 1742522672523666, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16783, "pid": 0, "tid": 7, "ts": 1742522672605642, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16783, "pid": 494, "tid": 494, "ts": 1742522672523780, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16795, "pid": 494, "tid": 494, "ts": 1742522672523950, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16797, "pid": 494, "tid": 494, "ts": 1742522672523956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16799, "pid": 494, "tid": 494, "ts": 1742522672523956, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16801, "pid": 494, "tid": 494, "ts": 1742522672523957, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16803, "pid": 494, "tid": 494, "ts": 1742522672523958, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16804, "pid": 0, "tid": 7, "ts": 1742522672605647, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16804, "pid": 494, "tid": 494, "ts": 1742522672523962, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16813, "pid": 0, "tid": 7, "ts": 1742522672605662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16813, "pid": 494, "tid": 494, "ts": 1742522672524036, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16836, "pid": 0, "tid": 7, "ts": 1742522672605668, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16836, "pid": 494, "tid": 494, "ts": 1742522672524100, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16848, "pid": 494, "tid": 494, "ts": 1742522672524191, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16850, "pid": 494, "tid": 494, "ts": 1742522672524192, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16852, "pid": 494, "tid": 494, "ts": 1742522672524193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16854, "pid": 494, "tid": 494, "ts": 1742522672524193, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16856, "pid": 494, "tid": 494, "ts": 1742522672524194, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16857, "pid": 0, "tid": 7, "ts": 1742522672605670, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16857, "pid": 494, "tid": 494, "ts": 1742522672524198, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16869, "pid": 494, "tid": 494, "ts": 1742522672524277, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16870, "pid": 494, "tid": 494, "ts": 1742522672524281, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16871, "pid": 0, "tid": 7, "ts": 1742522672605699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16871, "pid": 494, "tid": 494, "ts": 1742522672524283, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16878, "pid": 0, "tid": 7, "ts": 1742522672605731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16878, "pid": 494, "tid": 494, "ts": 1742522672524315, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16899, "pid": 0, "tid": 7, "ts": 1742522672605743, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16899, "pid": 494, "tid": 494, "ts": 1742522672524371, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16934, "pid": 494, "tid": 494, "ts": 1742522672524409, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16935, "pid": 0, "tid": 7, "ts": 1742522672605758, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16935, "pid": 494, "tid": 494, "ts": 1742522672524411, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16937, "pid": 0, "tid": 7, "ts": 1742522672606224, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16937, "pid": 494, "tid": 494, "ts": 1742522672524416, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16949, "pid": 494, "tid": 494, "ts": 1742522672524459, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16950, "pid": 494, "tid": 494, "ts": 1742522672524462, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16951, "pid": 0, "tid": 7, "ts": 1742522672606251, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16951, "pid": 494, "tid": 494, "ts": 1742522672524463, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16968, "pid": 0, "tid": 7, "ts": 1742522672606280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16968, "pid": 494, "tid": 494, "ts": 1742522672524498, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 16980, "pid": 494, "tid": 494, "ts": 1742522672524591, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16982, "pid": 494, "tid": 494, "ts": 1742522672524593, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16984, "pid": 494, "tid": 494, "ts": 1742522672524594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16986, "pid": 494, "tid": 494, "ts": 1742522672524594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16988, "pid": 494, "tid": 494, "ts": 1742522672524595, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 16989, "pid": 0, "tid": 7, "ts": 1742522672606290, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 16989, "pid": 494, "tid": 494, "ts": 1742522672524598, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17011, "pid": 0, "tid": 7, "ts": 1742522672606346, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17011, "pid": 494, "tid": 494, "ts": 1742522672524692, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17021, "pid": 494, "tid": 494, "ts": 1742522672524749, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17022, "pid": 494, "tid": 494, "ts": 1742522672524752, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17023, "pid": 0, "tid": 7, "ts": 1742522672606354, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17023, "pid": 494, "tid": 494, "ts": 1742522672524754, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17025, "pid": 0, "tid": 7, "ts": 1742522672606365, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17025, "pid": 494, "tid": 494, "ts": 1742522672524762, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17034, "pid": 494, "tid": 494, "ts": 1742522672524793, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17036, "pid": 0, "tid": 7, "ts": 1742522672606370, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17036, "pid": 494, "tid": 494, "ts": 1742522672524797, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17061, "pid": 0, "tid": 7, "ts": 1742522672606376, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17061, "pid": 494, "tid": 494, "ts": 1742522672524850, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 17077, "pid": 494, "tid": 494, "ts": 1742522672524915, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17118, "pid": 0, "tid": 16, "ts": 1742522672606388, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17118, "pid": 494, "tid": 494, "ts": 1742522672524942, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 17127, "pid": 494, "tid": 494, "ts": 1742522672524992, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17171, "pid": 0, "tid": 7, "ts": 1742522672606592, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17171, "pid": 494, "tid": 494, "ts": 1742522672525025, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17181, "pid": 494, "tid": 494, "ts": 1742522672525143, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17183, "pid": 494, "tid": 494, "ts": 1742522672525144, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17185, "pid": 494, "tid": 494, "ts": 1742522672525145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17187, "pid": 494, "tid": 494, "ts": 1742522672525145, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17189, "pid": 494, "tid": 494, "ts": 1742522672525146, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17190, "pid": 0, "tid": 7, "ts": 1742522672606609, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17190, "pid": 494, "tid": 494, "ts": 1742522672525150, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17206, "pid": 0, "tid": 7, "ts": 1742522672606692, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17206, "pid": 494, "tid": 494, "ts": 1742522672525187, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17217, "pid": 494, "tid": 494, "ts": 1742522672525271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17219, "pid": 494, "tid": 494, "ts": 1742522672525273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17221, "pid": 494, "tid": 494, "ts": 1742522672525273, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17223, "pid": 494, "tid": 494, "ts": 1742522672525274, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17225, "pid": 494, "tid": 494, "ts": 1742522672525275, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17226, "pid": 0, "tid": 7, "ts": 1742522672606702, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17226, "pid": 494, "tid": 494, "ts": 1742522672525278, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17227, "pid": 0, "tid": 7, "ts": 1742522672606745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17227, "pid": 494, "tid": 494, "ts": 1742522672525290, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17236, "pid": 0, "tid": 7, "ts": 1742522672606787, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17236, "pid": 494, "tid": 494, "ts": 1742522672525308, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17252, "pid": 0, "tid": 7, "ts": 1742522672606813, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17252, "pid": 494, "tid": 494, "ts": 1742522672525346, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17264, "pid": 494, "tid": 494, "ts": 1742522672525431, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17266, "pid": 494, "tid": 494, "ts": 1742522672525432, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17268, "pid": 494, "tid": 494, "ts": 1742522672525433, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17270, "pid": 494, "tid": 494, "ts": 1742522672525434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17272, "pid": 494, "tid": 494, "ts": 1742522672525434, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17273, "pid": 0, "tid": 7, "ts": 1742522672606819, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17273, "pid": 494, "tid": 494, "ts": 1742522672525438, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17289, "pid": 0, "tid": 7, "ts": 1742522672606842, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17289, "pid": 494, "tid": 494, "ts": 1742522672525469, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17301, "pid": 494, "tid": 494, "ts": 1742522672525555, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17303, "pid": 494, "tid": 494, "ts": 1742522672525556, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17305, "pid": 494, "tid": 494, "ts": 1742522672525557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17307, "pid": 494, "tid": 494, "ts": 1742522672525557, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17309, "pid": 494, "tid": 494, "ts": 1742522672525558, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17310, "pid": 0, "tid": 7, "ts": 1742522672606847, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17310, "pid": 494, "tid": 494, "ts": 1742522672525561, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17320, "pid": 494, "tid": 494, "ts": 1742522672525639, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17322, "pid": 494, "tid": 494, "ts": 1742522672525640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17324, "pid": 494, "tid": 494, "ts": 1742522672525640, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17326, "pid": 494, "tid": 494, "ts": 1742522672525641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17328, "pid": 494, "tid": 494, "ts": 1742522672525641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17329, "pid": 0, "tid": 7, "ts": 1742522672606859, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17329, "pid": 494, "tid": 494, "ts": 1742522672525643, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17345, "pid": 0, "tid": 7, "ts": 1742522672606939, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17345, "pid": 494, "tid": 494, "ts": 1742522672525666, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17356, "pid": 494, "tid": 494, "ts": 1742522672525731, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17358, "pid": 494, "tid": 494, "ts": 1742522672525732, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17360, "pid": 494, "tid": 494, "ts": 1742522672525733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17362, "pid": 494, "tid": 494, "ts": 1742522672525733, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17364, "pid": 494, "tid": 494, "ts": 1742522672525734, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17365, "pid": 0, "tid": 7, "ts": 1742522672606948, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17365, "pid": 494, "tid": 494, "ts": 1742522672525735, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17366, "pid": 0, "tid": 7, "ts": 1742522672606992, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17366, "pid": 494, "tid": 494, "ts": 1742522672525745, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 17375, "pid": 494, "tid": 494, "ts": 1742522672525758, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17388, "pid": 0, "tid": 16, "ts": 1742522672607159, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17388, "pid": 494, "tid": 494, "ts": 1742522672525768, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 17401, "pid": 494, "tid": 494, "ts": 1742522672525788, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17410, "pid": 0, "tid": 7, "ts": 1742522672607514, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17410, "pid": 494, "tid": 494, "ts": 1742522672525802, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17420, "pid": 0, "tid": 7, "ts": 1742522672607521, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17420, "pid": 494, "tid": 494, "ts": 1742522672525831, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17449, "pid": 0, "tid": 7, "ts": 1742522672607527, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17449, "pid": 494, "tid": 494, "ts": 1742522672525932, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17458, "pid": 0, "tid": 7, "ts": 1742522672607534, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17458, "pid": 494, "tid": 494, "ts": 1742522672525975, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17473, "pid": 494, "tid": 494, "ts": 1742522672526008, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17474, "pid": 0, "tid": 7, "ts": 1742522672607543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17474, "pid": 494, "tid": 494, "ts": 1742522672526018, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17475, "pid": 0, "tid": 7, "ts": 1742522672607546, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17475, "pid": 494, "tid": 494, "ts": 1742522672526024, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17476, "pid": 494, "tid": 494, "ts": 1742522672526028, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17477, "pid": 494, "tid": 494, "ts": 1742522672526029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17478, "pid": 494, "tid": 494, "ts": 1742522672526029, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17479, "pid": 0, "tid": 7, "ts": 1742522672607549, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17479, "pid": 494, "tid": 494, "ts": 1742522672526030, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17486, "pid": 0, "tid": 7, "ts": 1742522672608222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17486, "pid": 494, "tid": 494, "ts": 1742522672526199, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17505, "pid": 494, "tid": 494, "ts": 1742522672526222, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17506, "pid": 0, "tid": 7, "ts": 1742522672608289, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17506, "pid": 494, "tid": 494, "ts": 1742522672526224, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17517, "pid": 0, "tid": 7, "ts": 1742522672608444, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17517, "pid": 494, "tid": 494, "ts": 1742522672526244, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17535, "pid": 0, "tid": 7, "ts": 1742522672608448, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17535, "pid": 494, "tid": 494, "ts": 1742522672526266, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17541, "pid": 0, "tid": 7, "ts": 1742522672608451, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17541, "pid": 494, "tid": 494, "ts": 1742522672526277, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17547, "pid": 0, "tid": 7, "ts": 1742522672608456, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17547, "pid": 494, "tid": 494, "ts": 1742522672526284, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17552, "pid": 0, "tid": 7, "ts": 1742522672608460, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17552, "pid": 494, "tid": 494, "ts": 1742522672526292, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17558, "pid": 0, "tid": 7, "ts": 1742522672608485, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17558, "pid": 494, "tid": 494, "ts": 1742522672526303, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17576, "pid": 0, "tid": 7, "ts": 1742522672608496, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17576, "pid": 494, "tid": 494, "ts": 1742522672526319, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17589, "pid": 0, "tid": 7, "ts": 1742522672608498, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17589, "pid": 494, "tid": 494, "ts": 1742522672526345, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17599, "pid": 0, "tid": 7, "ts": 1742522672608511, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17599, "pid": 494, "tid": 494, "ts": 1742522672526359, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17610, "pid": 0, "tid": 7, "ts": 1742522672608518, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17610, "pid": 494, "tid": 494, "ts": 1742522672526377, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17620, "pid": 0, "tid": 7, "ts": 1742522672608525, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17620, "pid": 494, "tid": 494, "ts": 1742522672526388, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17626, "pid": 0, "tid": 7, "ts": 1742522672608528, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17626, "pid": 494, "tid": 494, "ts": 1742522672526403, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17646, "pid": 494, "tid": 494, "ts": 1742522672526424, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17648, "pid": 0, "tid": 7, "ts": 1742522672608532, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17648, "pid": 494, "tid": 494, "ts": 1742522672526426, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17654, "pid": 0, "tid": 7, "ts": 1742522672608538, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17654, "pid": 494, "tid": 494, "ts": 1742522672526437, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17668, "pid": 0, "tid": 7, "ts": 1742522672608543, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17668, "pid": 494, "tid": 494, "ts": 1742522672526453, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17681, "pid": 0, "tid": 7, "ts": 1742522672608561, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17681, "pid": 494, "tid": 494, "ts": 1742522672526471, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17687, "pid": 0, "tid": 7, "ts": 1742522672608564, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17687, "pid": 494, "tid": 494, "ts": 1742522672526487, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17695, "pid": 0, "tid": 7, "ts": 1742522672608567, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17695, "pid": 494, "tid": 494, "ts": 1742522672526500, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17703, "pid": 0, "tid": 7, "ts": 1742522672608572, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17703, "pid": 494, "tid": 494, "ts": 1742522672526511, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17713, "pid": 0, "tid": 7, "ts": 1742522672608576, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17713, "pid": 494, "tid": 494, "ts": 1742522672526530, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17725, "pid": 0, "tid": 7, "ts": 1742522672608579, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17725, "pid": 494, "tid": 494, "ts": 1742522672526548, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17731, "pid": 494, "tid": 494, "ts": 1742522672526565, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17735, "pid": 0, "tid": 7, "ts": 1742522672608588, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17735, "pid": 494, "tid": 494, "ts": 1742522672526570, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17738, "pid": 494, "tid": 494, "ts": 1742522672526582, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17742, "pid": 0, "tid": 7, "ts": 1742522672608592, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17742, "pid": 494, "tid": 494, "ts": 1742522672526585, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17745, "pid": 494, "tid": 494, "ts": 1742522672526594, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17749, "pid": 0, "tid": 7, "ts": 1742522672608597, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17749, "pid": 494, "tid": 494, "ts": 1742522672526596, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17752, "pid": 494, "tid": 494, "ts": 1742522672526604, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17756, "pid": 0, "tid": 7, "ts": 1742522672608601, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17756, "pid": 494, "tid": 494, "ts": 1742522672526607, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17759, "pid": 494, "tid": 494, "ts": 1742522672526614, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17763, "pid": 0, "tid": 7, "ts": 1742522672608606, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17763, "pid": 494, "tid": 494, "ts": 1742522672526616, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17776, "pid": 0, "tid": 7, "ts": 1742522672608610, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17776, "pid": 494, "tid": 494, "ts": 1742522672526632, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17780, "pid": 494, "tid": 494, "ts": 1742522672526641, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17784, "pid": 0, "tid": 7, "ts": 1742522672608621, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17784, "pid": 494, "tid": 494, "ts": 1742522672526644, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17787, "pid": 494, "tid": 494, "ts": 1742522672526652, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17791, "pid": 0, "tid": 7, "ts": 1742522672608625, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17791, "pid": 494, "tid": 494, "ts": 1742522672526655, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17794, "pid": 494, "tid": 494, "ts": 1742522672526662, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17798, "pid": 0, "tid": 7, "ts": 1742522672608629, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17798, "pid": 494, "tid": 494, "ts": 1742522672526665, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17801, "pid": 494, "tid": 494, "ts": 1742522672526671, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17805, "pid": 0, "tid": 7, "ts": 1742522672608634, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17805, "pid": 494, "tid": 494, "ts": 1742522672526674, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17817, "pid": 0, "tid": 7, "ts": 1742522672608638, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17817, "pid": 494, "tid": 494, "ts": 1742522672526687, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 17822, "pid": 494, "tid": 494, "ts": 1742522672526696, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 17826, "pid": 0, "tid": 7, "ts": 1742522672608645, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 17826, "pid": 494, "tid": 494, "ts": 1742522672526699, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 18739, "pid": 0, "tid": 7, "ts": 1742522672608650, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 18739, "pid": 494, "tid": 494, "ts": 1742522672527138, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 18743, "pid": 0, "tid": 7, "ts": 1742522672608652, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 18743, "pid": 494, "tid": 494, "ts": 1742522672527148, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19657, "pid": 0, "tid": 7, "ts": 1742522672608656, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19657, "pid": 494, "tid": 494, "ts": 1742522672527578, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19661, "pid": 0, "tid": 7, "ts": 1742522672608659, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19661, "pid": 494, "tid": 494, "ts": 1742522672527586, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19665, "pid": 494, "tid": 494, "ts": 1742522672527607, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 19669, "pid": 0, "tid": 7, "ts": 1742522672608672, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19669, "pid": 494, "tid": 494, "ts": 1742522672527611, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19686, "pid": 0, "tid": 7, "ts": 1742522672608676, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19686, "pid": 494, "tid": 494, "ts": 1742522672527644, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19695, "pid": 0, "tid": 7, "ts": 1742522672608680, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19695, "pid": 494, "tid": 494, "ts": 1742522672527658, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19702, "pid": 0, "tid": 7, "ts": 1742522672608685, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19702, "pid": 494, "tid": 494, "ts": 1742522672527700, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19719, "pid": 0, "tid": 7, "ts": 1742522672608688, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19719, "pid": 494, "tid": 494, "ts": 1742522672527721, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19733, "pid": 0, "tid": 7, "ts": 1742522672608693, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19733, "pid": 494, "tid": 494, "ts": 1742522672527741, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19744, "pid": 0, "tid": 7, "ts": 1742522672608695, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19744, "pid": 494, "tid": 494, "ts": 1742522672527754, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19751, "pid": 494, "tid": 494, "ts": 1742522672527791, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19757, "pid": 494, "tid": 494, "ts": 1742522672527801, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19761, "pid": 494, "tid": 494, "ts": 1742522672527817, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19766, "pid": 494, "tid": 494, "ts": 1742522672527830, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19768, "pid": 494, "tid": 494, "ts": 1742522672527832, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 19769, "pid": 0, "tid": 13, "ts": 1742522672608699, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19772, "pid": 494, "tid": 494, "ts": 1742522672527873, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 19791, "pid": 494, "tid": 494, "ts": 1742522672527891, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 19798, "pid": 494, "tid": 494, "ts": 1742522672527911, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19814, "pid": 0, "tid": 7, "ts": 1742522672608854, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19814, "pid": 494, "tid": 494, "ts": 1742522672527938, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19828, "pid": 0, "tid": 7, "ts": 1742522672608858, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19828, "pid": 494, "tid": 494, "ts": 1742522672527969, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19842, "pid": 0, "tid": 7, "ts": 1742522672608868, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19842, "pid": 494, "tid": 494, "ts": 1742522672527986, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19846, "pid": 494, "tid": 494, "ts": 1742522672527997, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 19850, "pid": 0, "tid": 7, "ts": 1742522672608877, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19850, "pid": 494, "tid": 494, "ts": 1742522672528002, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19862, "pid": 0, "tid": 7, "ts": 1742522672608885, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19862, "pid": 494, "tid": 494, "ts": 1742522672528020, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19880, "pid": 0, "tid": 7, "ts": 1742522672608889, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19880, "pid": 494, "tid": 494, "ts": 1742522672528046, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19894, "pid": 0, "tid": 7, "ts": 1742522672608908, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19894, "pid": 494, "tid": 494, "ts": 1742522672528061, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19898, "pid": 494, "tid": 494, "ts": 1742522672528069, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 19902, "pid": 0, "tid": 7, "ts": 1742522672608918, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19902, "pid": 494, "tid": 494, "ts": 1742522672528072, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 19913, "pid": 494, "tid": 494, "ts": 1742522672528127, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19979, "pid": 494, "tid": 494, "ts": 1742522672610505, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 19986, "pid": 0, "tid": 7, "ts": 1742522672610573, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19986, "pid": 494, "tid": 494, "ts": 1742522672610544, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 19987, "pid": 494, "tid": 494, "ts": 1742522672610574, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 19991, "pid": 494, "tid": 494, "ts": 1742522672610682, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 19997, "pid": 494, "tid": 494, "ts": 1742522672610706, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 20001, "pid": 494, "tid": 494, "ts": 1742522672610745, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 20006, "pid": 494, "tid": 494, "ts": 1742522672610765, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 20008, "pid": 494, "tid": 494, "ts": 1742522672610768, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 20009, "pid": 0, "tid": 13, "ts": 1742522672610838, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 20012, "pid": 494, "tid": 494, "ts": 1742522672610849, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 20031, "pid": 494, "tid": 494, "ts": 1742522672610883, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 20038, "pid": 494, "tid": 494, "ts": 1742522672610930, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 20047, "pid": 0, "tid": 7, "ts": 1742522672611128, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 20047, "pid": 494, "tid": 494, "ts": 1742522672610954, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 20048, "pid": 494, "tid": 494, "ts": 1742522672611135, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 20059, "pid": 494, "tid": 494, "ts": 1742522672611188, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 20066, "pid": 0, "tid": 7, "ts": 1742522672611245, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 20066, "pid": 494, "tid": 494, "ts": 1742522672611211, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 20071, "pid": 494, "tid": 494, "ts": 1742522672611256, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 20077, "pid": 494, "tid": 494, "ts": 1742522672611263, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 20081, "pid": 494, "tid": 494, "ts": 1742522672611271, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 20086, "pid": 494, "tid": 494, "ts": 1742522672611278, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 20088, "pid": 494, "tid": 494, "ts": 1742522672611280, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "f", "id": 20089, "pid": 0, "tid": 13, "ts": 1742522672611311, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 20092, "pid": 494, "tid": 494, "ts": 1742522672611314, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 20111, "pid": 494, "tid": 494, "ts": 1742522672611328, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 20118, "pid": 494, "tid": 494, "ts": 1742522672611343, "cat": "ac2g", "name": "ac2g"}, {"ph": "f", "id": 20127, "pid": 0, "tid": 7, "ts": 1742522672611445, "cat": "ac2g", "name": "ac2g", "bp": "e"}, {"ph": "s", "id": 20127, "pid": 494, "tid": 494, "ts": 1742522672611352, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 20128, "pid": 494, "tid": 494, "ts": 1742522672611452, "cat": "ac2g", "name": "ac2g"}, {"ph": "s", "id": 20138, "pid": 494, "tid": 494, "ts": 1742522672611693, "cat": "ac2g", "name": "ac2g"}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 494, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 494, "tid": 0, "args": {"labels": "CPU"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 494, "tid": 0, "args": {"sort_index": 494}}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 0, "args": {"labels": "GPU 0"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 0, "args": {"sort_index": 16777216}}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 1, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 1, "tid": 0, "args": {"labels": "GPU 1"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 1, "tid": 0, "args": {"sort_index": 16777217}}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 2, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 2, "tid": 0, "args": {"labels": "GPU 2"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 2, "tid": 0, "args": {"sort_index": 16777218}}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 3, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 3, "tid": 0, "args": {"labels": "GPU 3"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 3, "tid": 0, "args": {"sort_index": 16777219}}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 4, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 4, "tid": 0, "args": {"labels": "GPU 4"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 4, "tid": 0, "args": {"sort_index": 16777220}}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 5, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 5, "tid": 0, "args": {"labels": "GPU 5"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 5, "tid": 0, "args": {"sort_index": 16777221}}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 6, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 6, "tid": 0, "args": {"labels": "GPU 6"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 6, "tid": 0, "args": {"sort_index": 16777222}}, {"name": "process_name", "ph": "M", "ts": 1742522672306396, "pid": 7, "tid": 0, "args": {"name": "python3"}}, {"name": "process_labels", "ph": "M", "ts": 1742522672306396, "pid": 7, "tid": 0, "args": {"labels": "GPU 7"}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 7, "tid": 0, "args": {"sort_index": 16777223}}, {"name": "thread_name", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 7, "args": {"name": "stream 7 "}}, {"name": "thread_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 7, "args": {"sort_index": 7}}, {"name": "thread_name", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 13, "args": {"name": "stream 13 "}}, {"name": "thread_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 13, "args": {"sort_index": 13}}, {"name": "thread_name", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 16, "args": {"name": "stream 16 "}}, {"name": "thread_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 0, "tid": 16, "args": {"sort_index": 16}}, {"name": "thread_name", "ph": "M", "ts": 1742522672306396, "pid": 494, "tid": 494, "args": {"name": "thread 494 (python3)"}}, {"name": "thread_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 494, "tid": 494, "args": {"sort_index": 494}}, {"name": "thread_name", "ph": "M", "ts": 1742522672306396, "pid": 494, "tid": 494, "args": {"name": "thread 494 (python3)"}}, {"name": "thread_sort_index", "ph": "M", "ts": 1742522672306396, "pid": 494, "tid": 494, "args": {"sort_index": 494}}, {"name": "process_sort_index", "ph": "M", "ts": 1742522672306275, "pid": "Spans", "tid": 0, "args": {"sort_index": 536870912}}, {"name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 1742522672306275}, {"name": "Record Window End", "ph": "i", "s": "g", "pid": "", "tid": "", "ts": 1742522672615142}], "traceName": "./decode-debug-rank0.json"}