mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-05-05 23:24:21 +00:00
Remove useless STSM
This commit is contained in:
parent
8041ed7164
commit
99eb6ec563
@ -867,16 +867,6 @@ struct SM90_64x192x32_F32E4M3E4M3_SS {
|
|||||||
static constexpr int kNumAccum = M * N / 128;
|
static constexpr int kNumAccum = M * N / 128;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename dtype_t>
|
|
||||||
struct SM90_U32x1_STSM_N {
|
|
||||||
__device__ __forceinline__ static void
|
|
||||||
copy(dtype_t src_0, void* smem_dst) {
|
|
||||||
const uint32_t src[1] = {*reinterpret_cast<uint32_t*>(&src_0)};
|
|
||||||
asm volatile("stmatrix.sync.aligned.x1.m8n8.shared.b16 [%0], {%1};\n"
|
|
||||||
:: "l"(smem_dst), "r"(src[0]));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename dtype_t>
|
template <typename dtype_t>
|
||||||
struct SM90_U32x2_STSM_N {
|
struct SM90_U32x2_STSM_N {
|
||||||
__device__ __forceinline__ static void
|
__device__ __forceinline__ static void
|
||||||
|
Loading…
Reference in New Issue
Block a user