mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Fix < PTX ISA 8.6 compatibility (#194)
This commit is contained in:
@@ -309,7 +309,7 @@ __device__ __forceinline__ void tma_load_1d(const void* smem_ptr, const void* gm
|
||||
auto mbar_int_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(mbar_ptr));
|
||||
auto smem_int_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
|
||||
const auto cache_hint = evict_first ? kEvictFirst : kEvictNormal;
|
||||
asm volatile("cp.async.bulk.shared::cta.global.mbarrier::complete_tx::bytes.L2::cache_hint [%0], [%1], %2, [%3], %4;\n"
|
||||
asm volatile("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes.L2::cache_hint [%0], [%1], %2, [%3], %4;\n"
|
||||
:: "r"(smem_int_ptr), "l"(gmem_ptr), "r"(num_bytes), "r"(mbar_int_ptr), "l"(cache_hint) : "memory");
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user