mirror of
https://github.com/deepseek-ai/DeepGEMM
synced 2025-06-26 23:15:49 +00:00
Add DG_PRINT_AUTOTUNE to README
This commit is contained in:
parent
daec8fd2fc
commit
085b4a1532
@ -120,6 +120,8 @@ The library also provides some environment variables, which may be useful:
|
|||||||
- `DG_JIT_PRINT_COMPILER_COMMAND`: `0` or `1`, print NVCC compilation command, `0` by default
|
- `DG_JIT_PRINT_COMPILER_COMMAND`: `0` or `1`, print NVCC compilation command, `0` by default
|
||||||
- Post optimization
|
- Post optimization
|
||||||
- `DG_JIT_DISABLE_FFMA_INTERLEAVE`: `0` or `1`, disable FFMA-interleaving optimization, `0` by default
|
- `DG_JIT_DISABLE_FFMA_INTERLEAVE`: `0` or `1`, disable FFMA-interleaving optimization, `0` by default
|
||||||
|
- Heuristic selection
|
||||||
|
- `DG_PRINT_AUTOTUNE`: `0` or `1`, print selected configs for each shape, `0` by default
|
||||||
- Testing
|
- Testing
|
||||||
- `DG_NSYS_PROFILING`: `0` or `1`, Nsight-system compatible testing, `0` by default
|
- `DG_NSYS_PROFILING`: `0` or `1`, Nsight-system compatible testing, `0` by default
|
||||||
|
|
||||||
|
|||||||
@ -74,8 +74,7 @@ class JITTuner:
|
|||||||
|
|
||||||
# Cache the best runtime and return
|
# Cache the best runtime and return
|
||||||
if int(os.getenv('DG_JIT_DEBUG', 0)) or int(os.getenv('DG_PRINT_AUTOTUNE', 0)):
|
if int(os.getenv('DG_JIT_DEBUG', 0)) or int(os.getenv('DG_PRINT_AUTOTUNE', 0)):
|
||||||
print(
|
print(f'Best JIT kernel {name} with keys {keys} has tuned keys {best_keys} and time {best_time}')
|
||||||
f'Best JIT kernel {name} with keys {keys} has tuned keys {best_keys} and time {best_time}')
|
|
||||||
self.tuned[signature] = (best_runtime, best_keys)
|
self.tuned[signature] = (best_runtime, best_keys)
|
||||||
return best_runtime, best_keys
|
return best_runtime, best_keys
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user