mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Update internode_ll.cu (#246)
This commit is contained in:
parent
c95997f8c4
commit
fbcf430006
@ -498,7 +498,7 @@ combine(void* combined_x,
|
||||
}
|
||||
cg::this_grid().sync();
|
||||
|
||||
// Reduce tokens with FP8 cast
|
||||
// Reduce tokens
|
||||
EP_DEVICE_ASSERT(num_topk <= 32 and hidden_bf16_int4 <= num_threads);
|
||||
EP_STATIC_ASSERT(kHidden % (32 * kNumElemsPerInt4) == 0, "Invalid vectorization");
|
||||
if (thread_id < hidden_bf16_int4) {
|
||||
|
||||
Loading…
Reference in New Issue
Block a user