Fix linking error from ODR violation

This commit is contained in:
Lequn Chen 2025-04-05 17:35:04 +00:00
parent c187c23ba8
commit 611e3f659d
2 changed files with 4 additions and 4 deletions

View File

@ -888,15 +888,15 @@ struct SM90_U32x4_STSM_N {
}
};
__device__ void warpgroup_arrive() {
__forceinline__ __device__ void warpgroup_arrive() {
asm volatile("wgmma.fence.sync.aligned;\n" ::: "memory");
}
__device__ void warpgroup_commit_batch() {
__forceinline__ __device__ void warpgroup_commit_batch() {
asm volatile("wgmma.commit_group.sync.aligned;\n" ::: "memory");
}
__device__ void warpgroup_fence_operand(float& reg) {
__forceinline__ __device__ void warpgroup_fence_operand(float& reg) {
asm volatile("" : "+f"(reg) :: "memory");
}

View File

@ -40,7 +40,7 @@ constexpr CUtensorMapDataType get_CUtensorMapDataType() {
}
}
PFN_cuTensorMapEncodeTiled get_cuTensorMapEncodeTiled() {
inline PFN_cuTensorMapEncodeTiled get_cuTensorMapEncodeTiled() {
// Get pointer to `cuTensorMapEncodeTiled`
cudaDriverEntryPointQueryResult driver_status;
void* cuTensorMapEncodeTiled_ptr = nullptr;