Add DG_NVCC_OVERRIDE_CPP_STANDARD

2025-06-26 23:15:49 +00:00 · 2025-04-03 15:53:29 +08:00 · 2025-04-03 15:53:29 +08:00 · d14962f072
commit d14962f072
parent 3a5539b7db
2 changed files with 3 additions and 1 deletions
--- a/README.md
+++ b/README.md
@ -128,6 +128,7 @@ The library also provides some environment variables, which may be useful:

 - `DG_CACHE_DIR`: string, the cache directory to store compiled kernels, `$HOME/.deep_gemm` by default
 - `DG_NVCC_COMPILER`: string, specified NVCC compiler path; will find in `from torch.utils.cpp_extension.CUDA_HOME` by default
+- `DG_NVCC_OVERRIDE_CPP_STANDARD`: integer (e.g., `20`), support for some old version GCC compiler
 - `DG_DISABLE_FFMA_INTERLEAVE`: 0 or 1, disable FFMA-interleaving optimization
 - `DG_PTXAS_VERBOSE`: 0 or 1, show detailed PTXAS compiler output
 - `DG_PRINT_REG_REUSE`: 0 or 1, print FFMA-interleaving details
--- a/deep_gemm/jit/compiler.py
+++ b/deep_gemm/jit/compiler.py
@ -96,7 +96,8 @@ def put(path, data, is_binary=False):

 def build(name: str, arg_defs: tuple, code: str) -> Runtime:
    # Compiler flags
-    nvcc_flags = ['-std=c++20', '-shared', '-O3', '--expt-relaxed-constexpr', '--expt-extended-lambda',
+    cpp_standard = int(os.getenv('DG_NVCC_OVERRIDE_CPP_STANDARD', 20))
+    nvcc_flags = [f'-std=c++{cpp_standard}', '-shared', '-O3', '--expt-relaxed-constexpr', '--expt-extended-lambda',
                  '-gencode=arch=compute_90a,code=sm_90a',
                  '--ptxas-options=--register-usage-level=10' + (',--verbose' if 'DG_PTXAS_VERBOSE' in os.environ else ''),
                  # Suppress some unnecessary warnings, such as unused variables for certain `constexpr` branch cases