diff --git a/deep_gemm/include/deep_gemm/scheduler.cuh b/deep_gemm/include/deep_gemm/scheduler.cuh index 81bfeba..69ea216 100644 --- a/deep_gemm/include/deep_gemm/scheduler.cuh +++ b/deep_gemm/include/deep_gemm/scheduler.cuh @@ -116,7 +116,7 @@ struct Scheduler { if constexpr (kGemmType == GemmType::Normal) { return block_idx * block_size; } else if constexpr (kGemmType == GemmType::GroupedContiguous) { - auto offset = kIgnoreGroupedForGroupedContiguous ? 0 : __ldg(grouped_layout + m_block_idx * BLOCK_M); + auto offset = kIgnoreGroupedForGroupedContiguous ? 0 : max(0, __ldg(grouped_layout + m_block_idx * BLOCK_M)); return offset * shape_dim + block_idx * block_size; } else if constexpr (kGemmType == GemmType::GroupedMasked) { return curr_group_idx * shape_dim + block_idx * block_size;