Merge pull request #80 from abcdabcd987/fix-link-error

Fix linking error from ODR violation
This commit is contained in:
Chenggang Zhao 2025-04-07 09:31:58 +08:00 committed by GitHub
commit 3ea3cb203c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 4 additions and 4 deletions

View File

@ -888,15 +888,15 @@ struct SM90_U32x4_STSM_N {
} }
}; };
__device__ void warpgroup_arrive() { __forceinline__ __device__ void warpgroup_arrive() {
asm volatile("wgmma.fence.sync.aligned;\n" ::: "memory"); asm volatile("wgmma.fence.sync.aligned;\n" ::: "memory");
} }
__device__ void warpgroup_commit_batch() { __forceinline__ __device__ void warpgroup_commit_batch() {
asm volatile("wgmma.commit_group.sync.aligned;\n" ::: "memory"); asm volatile("wgmma.commit_group.sync.aligned;\n" ::: "memory");
} }
__device__ void warpgroup_fence_operand(float& reg) { __forceinline__ __device__ void warpgroup_fence_operand(float& reg) {
asm volatile("" : "+f"(reg) :: "memory"); asm volatile("" : "+f"(reg) :: "memory");
} }

View File

@ -40,7 +40,7 @@ constexpr CUtensorMapDataType get_CUtensorMapDataType() {
} }
} }
PFN_cuTensorMapEncodeTiled get_cuTensorMapEncodeTiled() { inline PFN_cuTensorMapEncodeTiled get_cuTensorMapEncodeTiled() {
// Get pointer to `cuTensorMapEncodeTiled` // Get pointer to `cuTensorMapEncodeTiled`
cudaDriverEntryPointQueryResult driver_status; cudaDriverEntryPointQueryResult driver_status;
void* cuTensorMapEncodeTiled_ptr = nullptr; void* cuTensorMapEncodeTiled_ptr = nullptr;