mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Make TORCH_CUDA_ARCH_LIST
as an environment variable (#167)
* Add 10.0 to TORCH_CUDA_ARCH_LIST Signed-off-by: Guyue Huang <guyueh@nvidia.com> * Revert csrc/CMakeLists change; in setup.py make TORCH_CUDA_ARCH_LIST configurable Signed-off-by: Guyue Huang <guyueh@nvidia.com> --------- Signed-off-by: Guyue Huang <guyueh@nvidia.com>
This commit is contained in:
parent
bb393e7760
commit
d5ca4495c0
3
setup.py
3
setup.py
@ -10,7 +10,8 @@ if __name__ == '__main__':
|
||||
print(f'NVSHMEM directory: {nvshmem_dir}')
|
||||
|
||||
# TODO: currently, we only support Hopper architecture, we may add Ampere support later
|
||||
os.environ['TORCH_CUDA_ARCH_LIST'] = '9.0'
|
||||
if os.getenv('TORCH_CUDA_ARCH_LIST', None) is None:
|
||||
os.environ['TORCH_CUDA_ARCH_LIST'] = '9.0'
|
||||
cxx_flags = ['-O3', '-Wno-deprecated-declarations', '-Wno-unused-variable',
|
||||
'-Wno-sign-compare', '-Wno-reorder', '-Wno-attributes']
|
||||
nvcc_flags = ['-O3', '-Xcompiler', '-O3', '-rdc=true', '--ptxas-options=--register-usage-level=10']
|
||||
|
Loading…
Reference in New Issue
Block a user