Add nvidia-cdi-refresh service
Some checks failed
CI Pipeline / code-scanning (push) Has been cancelled
CI Pipeline / variables (push) Has been cancelled
CI Pipeline / golang (push) Has been cancelled
CI Pipeline / image (push) Has been cancelled
CI Pipeline / e2e-test (push) Has been cancelled

Automatic regeneration of /var/run/cdi/nvidia.yaml
New units:
	•	nvidia-cdi-refresh.service – one-shot wrapper for
			nvidia-ctk cdi generate (adds sleep + required caps).
	•	nvidia-cdi-refresh.path   – fires on driver install/upgrade via
			modules.dep.bin changes.
Packaging
	•	RPM %post reloads systemd and enables the path unit on fresh
			installs.
	•	DEB postinst does the same (configure, skip on upgrade).

Result: CDI spec is always up to date

Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
This commit is contained in:
Carlos Eduardo Arango Gutierrez 2025-05-12 15:22:42 +02:00
parent 890db82b46
commit 8865f6d848
No known key found for this signature in database
GPG Key ID: 42D9CB42F300A852
10 changed files with 97 additions and 1 deletions

View File

@ -0,0 +1,23 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[Unit]
Description=Trigger CDI refresh on NVIDIA driver install / uninstall events
[Path]
PathChanged=/lib/modules/%v/modules.dep
PathChanged=/lib/modules/%v/modules.dep.bin
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,27 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[Unit]
Description=Refresh NVIDIA CDI specification file
ConditionPathExists=/usr/bin/nvidia-smi
ConditionPathExists=/usr/bin/nvidia-ctk
[Service]
Type=oneshot
ExecCondition=/usr/bin/grep -qE '/nvidia.ko' /lib/modules/%v/modules.dep
ExecStart=/usr/bin/nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml
CapabilityBoundingSet=CAP_SYS_MODULE CAP_SYS_ADMIN CAP_MKNOD
[Install]
WantedBy=multi-user.target

View File

@ -55,6 +55,7 @@ RUN make PREFIX=${DIST_DIR} cmds
WORKDIR $DIST_DIR
COPY packaging/debian ./debian
COPY deployments/systemd/ .
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}

View File

@ -46,6 +46,7 @@ RUN make PREFIX=${DIST_DIR} cmds
WORKDIR $DIST_DIR/..
COPY packaging/rpm .
COPY deployments/systemd/ .
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}

View File

@ -71,6 +71,7 @@ RUN make PREFIX=${DIST_DIR} cmds
WORKDIR $DIST_DIR/..
COPY packaging/rpm .
COPY deployments/systemd/ ${DIST_DIR}/
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}

View File

@ -53,6 +53,7 @@ RUN make PREFIX=${DIST_DIR} cmds
WORKDIR $DIST_DIR
COPY packaging/debian ./debian
COPY deployments/systemd/ .
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}

View File

@ -1,3 +1,5 @@
nvidia-container-runtime /usr/bin
nvidia-ctk /usr/bin
nvidia-cdi-hook /usr/bin
nvidia-cdi-refresh.service /etc/systemd/system/
nvidia-cdi-refresh.path /etc/systemd/system/

View File

@ -5,6 +5,16 @@ set -e
case "$1" in
configure)
/usr/bin/nvidia-ctk --quiet config --config-file=/etc/nvidia-container-runtime/config.toml --in-place
if command -v systemctl >/dev/null 2>&1 \
&& systemctl --quiet is-system-running 2>/dev/null; then
systemctl daemon-reload || true
if [ -z "$2" ]; then # $2 empty → first install
systemctl enable --now nvidia-cdi-refresh.path || true
fi
fi
;;
abort-upgrade|abort-remove|abort-deconfigure)

View File

@ -5,3 +5,14 @@
%:
dh $@
override_dh_fixperms:
dh_fixperms
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime-hook || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime.cdi || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime.legacy || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-ctk || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-cdi-hook || true
chmod 644 debian/$(shell dh_listpackages)/etc/systemd/system/nvidia-cdi-refresh.service || true
chmod 644 debian/$(shell dh_listpackages)/etc/systemd/system/nvidia-cdi-refresh.path || true

View File

@ -17,6 +17,8 @@ Source3: nvidia-container-runtime
Source4: nvidia-container-runtime.cdi
Source5: nvidia-container-runtime.legacy
Source6: nvidia-cdi-hook
Source7: nvidia-cdi-refresh.service
Source8: nvidia-cdi-refresh.path
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
Provides: nvidia-container-runtime
@ -28,16 +30,20 @@ Requires: nvidia-container-toolkit-base == %{version}-%{release}
Provides tools and utilities to enable GPU support in containers.
%prep
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} .
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} .
%install
mkdir -p %{buildroot}%{_bindir}
mkdir -p %{buildroot}%{_sysconfdir}/systemd/system/
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
install -m 644 -t %{buildroot}%{_sysconfdir}/systemd/system nvidia-cdi-refresh.service
install -m 644 -t %{buildroot}%{_sysconfdir}/systemd/system nvidia-cdi-refresh.path
%post
if [ $1 -gt 1 ]; then # only on package upgrade
@ -45,6 +51,17 @@ if [ $1 -gt 1 ]; then # only on package upgrade
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
fi
# Reload systemd unit cache
if command -v systemctl >/dev/null 2>&1 \
&& systemctl --quiet is-system-running 2>/dev/null; then
systemctl daemon-reload || true
# On fresh install ($1 == 1) enable the path unit so it starts at boot
if [ "$1" -eq 1 ]; then
systemctl enable --now nvidia-cdi-refresh.path || true
fi
fi
%posttrans
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
# repairing lost file nvidia-container-runtime-hook
@ -89,6 +106,8 @@ Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit
%{_bindir}/nvidia-container-runtime
%{_bindir}/nvidia-ctk
%{_bindir}/nvidia-cdi-hook
%{_sysconfdir}/systemd/system/nvidia-cdi-refresh.service
%{_sysconfdir}/systemd/system/nvidia-cdi-refresh.path
# The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes.
# This package is not distributed as part of the NVIDIA Container Toolkit RPMs.