Solve STSM bank conflict via padding and 3D TMA

This commit is contained in:
Chenggang Zhao
2025-04-03 15:39:35 +08:00
parent c57699ac93
commit 6db7e1863b
6 changed files with 119 additions and 51 deletions

View File

@@ -96,7 +96,7 @@ def put(path, data, is_binary=False):
def build(name: str, arg_defs: tuple, code: str) -> Runtime:
# Compiler flags
nvcc_flags = ['-std=c++20', '-shared', '-O3', '--expt-relaxed-constexpr', '--expt-extended-lambda',
nvcc_flags = ['-std=c++17', '-shared', '-O3', '--expt-relaxed-constexpr', '--expt-extended-lambda',
'-gencode=arch=compute_90a,code=sm_90a',
'--ptxas-options=--register-usage-level=10' + (',--verbose' if 'DG_PTXAS_VERBOSE' in os.environ else ''),
# Suppress some unnecessary warnings, such as unused variables for certain `constexpr` branch cases