Unify kwargs usages

This commit is contained in:
Chenggang Zhao
2025-05-15 16:53:52 +08:00
parent 350989eef3
commit 3b412f458a
6 changed files with 14 additions and 15 deletions

View File

@@ -1,4 +1,3 @@
import copy
import os
import subprocess
import time
@@ -27,14 +26,14 @@ class Runtime:
return all(os.path.exists(os.path.join(path, file)) for file in files)
@staticmethod
def generate(**kwargs) -> str:
def generate(kwargs: Dict[str, Any]) -> str:
raise NotImplemented
@staticmethod
def launch(kernel: cbd.CUkernel, **kwargs) -> cbd.CUresult:
def launch(kernel: cbd.CUkernel, kwargs: Dict[str, Any]) -> cbd.CUresult:
raise NotImplemented
def __call__(self, **kwargs) -> cbd.CUresult:
def __call__(self, kwargs: Dict[str, Any]) -> cbd.CUresult:
# Load CUBIN
if self.kernel is None:
start_time = time.time_ns()

View File

@@ -237,6 +237,6 @@ def gemm_fp8_fp8_bf16_nt(lhs: Tuple[torch.Tensor, torch.Tensor],
}
# Generate, build and run the kernel
code = FP8GemmRuntime.generate(**kwargs)
code = FP8GemmRuntime.generate(kwargs)
runtime = build('gemm_fp8_fp8_bf16_nt', code, FP8GemmRuntime, kwargs)
runtime(**kwargs)
runtime(kwargs)

View File

@@ -101,9 +101,9 @@ def m_grouped_gemm_fp8_fp8_bf16_nt_contiguous(lhs: Tuple[torch.Tensor, torch.Ten
}
# Generate, build and run the kernel
code = FP8GemmRuntime.generate(**kwargs)
code = FP8GemmRuntime.generate(kwargs)
runtime = build('m_grouped_gemm_fp8_fp8_bf16_nt', code, FP8GemmRuntime, kwargs)
runtime(**kwargs)
runtime(kwargs)
def m_grouped_gemm_fp8_fp8_bf16_nt_masked(lhs: Tuple[torch.Tensor, torch.Tensor],
@@ -200,6 +200,6 @@ def m_grouped_gemm_fp8_fp8_bf16_nt_masked(lhs: Tuple[torch.Tensor, torch.Tensor]
}
# Generate, build and run the kernel
code = FP8GemmRuntime.generate(**kwargs)
code = FP8GemmRuntime.generate(kwargs)
runtime = build('m_grouped_gemm_fp8_fp8_bf16_nt', code, FP8GemmRuntime, kwargs)
runtime(**kwargs)
runtime(kwargs)

View File

@@ -138,7 +138,7 @@ class FP8GemmRuntime(Runtime):
super().__init__(path)
@staticmethod
def generate(**kwargs) -> str:
def generate(kwargs: Dict[str, Any]) -> str:
code = f'''
#ifdef __CUDACC_RTC__
#include <deep_gemm/nvrtc_std.cuh>
@@ -233,7 +233,7 @@ class FP8WGradGemmRuntime(Runtime):
super().__init__(path)
@staticmethod
def generate(**kwargs) -> str:
def generate(kwargs: Dict[str, Any]) -> str:
code = f'''
#ifdef __CUDACC_RTC__
#include <deep_gemm/nvrtc_std.cuh>

View File

@@ -108,9 +108,9 @@ def wgrad_gemm_fp8_fp8_fp32_nt(lhs: Tuple[torch.Tensor, torch.Tensor],
}
# Generate, build and run the kernel
code = FP8WGradGemmRuntime.generate(**kwargs)
code = FP8WGradGemmRuntime.generate(kwargs)
runtime = build('wgrad_gemm_fp8_fp8_fp32_nt', code, FP8WGradGemmRuntime, kwargs)
runtime(**kwargs)
runtime(kwargs)
def k_grouped_wgrad_gemm_fp8_fp8_fp32_nt(lhs: Tuple[torch.Tensor, torch.Tensor],

View File

@@ -16,7 +16,7 @@ class VectorAddRuntime(jit.Runtime):
super().__init__(path)
@staticmethod
def generate(**kwargs) -> str:
def generate(kwargs: Dict[str, Any]) -> str:
return f"""
#ifdef __CUDACC_RTC__
#include <deep_gemm/nvrtc_std.cuh>