Make TORCH_CUDA_ARCH_LIST as an environment variable (#167)

* Add 10.0 to TORCH_CUDA_ARCH_LIST

Signed-off-by: Guyue Huang <guyueh@nvidia.com>

* Revert csrc/CMakeLists change; in setup.py make TORCH_CUDA_ARCH_LIST configurable

Signed-off-by: Guyue Huang <guyueh@nvidia.com>

---------

Signed-off-by: Guyue Huang <guyueh@nvidia.com>
This commit is contained in:
guyueh1 2025-05-18 18:43:48 -07:00 committed by GitHub
parent bb393e7760
commit d5ca4495c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,7 +10,8 @@ if __name__ == '__main__':
print(f'NVSHMEM directory: {nvshmem_dir}')
# TODO: currently, we only support Hopper architecture, we may add Ampere support later
os.environ['TORCH_CUDA_ARCH_LIST'] = '9.0'
if os.getenv('TORCH_CUDA_ARCH_LIST', None) is None:
os.environ['TORCH_CUDA_ARCH_LIST'] = '9.0'
cxx_flags = ['-O3', '-Wno-deprecated-declarations', '-Wno-unused-variable',
'-Wno-sign-compare', '-Wno-reorder', '-Wno-attributes']
nvcc_flags = ['-O3', '-Xcompiler', '-O3', '-rdc=true', '--ptxas-options=--register-usage-level=10']