Remove useless STSM

This commit is contained in:
Chenggang Zhao 2025-04-11 10:45:36 +08:00
parent 8041ed7164
commit 99eb6ec563

View File

@ -867,16 +867,6 @@ struct SM90_64x192x32_F32E4M3E4M3_SS {
static constexpr int kNumAccum = M * N / 128;
};
template <typename dtype_t>
struct SM90_U32x1_STSM_N {
__device__ __forceinline__ static void
copy(dtype_t src_0, void* smem_dst) {
const uint32_t src[1] = {*reinterpret_cast<uint32_t*>(&src_0)};
asm volatile("stmatrix.sync.aligned.x1.m8n8.shared.b16 [%0], {%1};\n"
:: "l"(smem_dst), "r"(src[0]));
}
};
template <typename dtype_t>
struct SM90_U32x2_STSM_N {
__device__ __forceinline__ static void