mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-05-05 06:04:23 +00:00
Fix linking error from ODR violation
This commit is contained in:
parent
c187c23ba8
commit
611e3f659d
@ -888,15 +888,15 @@ struct SM90_U32x4_STSM_N {
|
||||
}
|
||||
};
|
||||
|
||||
__device__ void warpgroup_arrive() {
|
||||
__forceinline__ __device__ void warpgroup_arrive() {
|
||||
asm volatile("wgmma.fence.sync.aligned;\n" ::: "memory");
|
||||
}
|
||||
|
||||
__device__ void warpgroup_commit_batch() {
|
||||
__forceinline__ __device__ void warpgroup_commit_batch() {
|
||||
asm volatile("wgmma.commit_group.sync.aligned;\n" ::: "memory");
|
||||
}
|
||||
|
||||
__device__ void warpgroup_fence_operand(float& reg) {
|
||||
__forceinline__ __device__ void warpgroup_fence_operand(float& reg) {
|
||||
asm volatile("" : "+f"(reg) :: "memory");
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@ constexpr CUtensorMapDataType get_CUtensorMapDataType() {
|
||||
}
|
||||
}
|
||||
|
||||
PFN_cuTensorMapEncodeTiled get_cuTensorMapEncodeTiled() {
|
||||
inline PFN_cuTensorMapEncodeTiled get_cuTensorMapEncodeTiled() {
|
||||
// Get pointer to `cuTensorMapEncodeTiled`
|
||||
cudaDriverEntryPointQueryResult driver_status;
|
||||
void* cuTensorMapEncodeTiled_ptr = nullptr;
|
||||
|
Loading…
Reference in New Issue
Block a user