diff --git a/csrc/kernels/internode_ll.cu b/csrc/kernels/internode_ll.cu index b162492..db15bf5 100644 --- a/csrc/kernels/internode_ll.cu +++ b/csrc/kernels/internode_ll.cu @@ -498,7 +498,7 @@ combine(void* combined_x, } cg::this_grid().sync(); - // Reduce tokens with FP8 cast + // Reduce tokens EP_DEVICE_ASSERT(num_topk <= 32 and hidden_bf16_int4 <= num_threads); EP_STATIC_ASSERT(kHidden % (32 * kNumElemsPerInt4) == 0, "Invalid vectorization"); if (thread_id < hidden_bf16_int4) {