mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-06-26 23:15:49 +00:00
Add DG_PRINT_AUTOTUNE
to README
This commit is contained in:
parent
daec8fd2fc
commit
085b4a1532
@ -120,6 +120,8 @@ The library also provides some environment variables, which may be useful:
|
||||
- `DG_JIT_PRINT_COMPILER_COMMAND`: `0` or `1`, print NVCC compilation command, `0` by default
|
||||
- Post optimization
|
||||
- `DG_JIT_DISABLE_FFMA_INTERLEAVE`: `0` or `1`, disable FFMA-interleaving optimization, `0` by default
|
||||
- Heuristic selection
|
||||
- `DG_PRINT_AUTOTUNE`: `0` or `1`, print selected configs for each shape, `0` by default
|
||||
- Testing
|
||||
- `DG_NSYS_PROFILING`: `0` or `1`, Nsight-system compatible testing, `0` by default
|
||||
|
||||
|
@ -74,8 +74,7 @@ class JITTuner:
|
||||
|
||||
# Cache the best runtime and return
|
||||
if int(os.getenv('DG_JIT_DEBUG', 0)) or int(os.getenv('DG_PRINT_AUTOTUNE', 0)):
|
||||
print(
|
||||
f'Best JIT kernel {name} with keys {keys} has tuned keys {best_keys} and time {best_time}')
|
||||
print(f'Best JIT kernel {name} with keys {keys} has tuned keys {best_keys} and time {best_time}')
|
||||
self.tuned[signature] = (best_runtime, best_keys)
|
||||
return best_runtime, best_keys
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user