Fix < PTX ISA 8.6 compatibility (#194)

This commit is contained in:
Chenggang Zhao
2025-06-09 10:48:42 +08:00
committed by GitHub
parent 11a0b0e1a3
commit 564e375234

View File

@@ -309,7 +309,7 @@ __device__ __forceinline__ void tma_load_1d(const void* smem_ptr, const void* gm
auto mbar_int_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(mbar_ptr));
auto smem_int_ptr = static_cast<uint32_t>(__cvta_generic_to_shared(smem_ptr));
const auto cache_hint = evict_first ? kEvictFirst : kEvictNormal;
asm volatile("cp.async.bulk.shared::cta.global.mbarrier::complete_tx::bytes.L2::cache_hint [%0], [%1], %2, [%3], %4;\n"
asm volatile("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes.L2::cache_hint [%0], [%1], %2, [%3], %4;\n"
:: "r"(smem_int_ptr), "l"(gmem_ptr), "r"(num_bytes), "r"(mbar_int_ptr), "l"(cache_hint) : "memory");
}