mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Add nvidia-cdi-refresh service
Some checks are pending
Some checks are pending
Automatic regeneration of /var/run/cdi/nvidia.yaml New units: • nvidia-cdi-refresh.service – one-shot wrapper for nvidia-ctk cdi generate (adds sleep + required caps). • nvidia-cdi-refresh.path – fires on driver install/upgrade via modules.dep.bin changes. • 60-nvidia-cdi-refresh.rules – udev triggers for module add/remove, PCI bind/unbind/change, and MIG /dev/nvidia-caps* char-device events. Packaging • RPM %post reloads udev/systemd and enables the path unit on fresh installs. • DEB postinst does the same (configure, skip on upgrade). Result: CDI spec is always up to date Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
This commit is contained in:
parent
adb5e6719d
commit
512272201c
23
deployments/systemd/nvidia-cdi-refresh.path
Normal file
23
deployments/systemd/nvidia-cdi-refresh.path
Normal file
@ -0,0 +1,23 @@
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
[Unit]
|
||||
Description=Trigger CDI refresh on NVIDIA driver install / uninstall events
|
||||
|
||||
[Path]
|
||||
# depmod rewrites these exactly once per (un)install
|
||||
PathChanged=/lib/modules/%v/modules.dep.bin
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
26
deployments/systemd/nvidia-cdi-refresh.service
Normal file
26
deployments/systemd/nvidia-cdi-refresh.service
Normal file
@ -0,0 +1,26 @@
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
[Unit]
|
||||
Description=Refresh NVIDIA CDI specification file
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
# The 30-second delay ensures that dependent services or resources are fully initialized.
|
||||
ExecStartPre=/bin/sleep 30
|
||||
ExecStart=/usr/bin/nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml
|
||||
CapabilityBoundingSet=CAP_SYS_MODULE CAP_SYS_ADMIN CAP_MKNOD
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
21
deployments/udev/60-nvidia-cdi-refresh.rules
Normal file
21
deployments/udev/60-nvidia-cdi-refresh.rules
Normal file
@ -0,0 +1,21 @@
|
||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# NVIDIA kernel-module events
|
||||
ACTION=="add|remove", SUBSYSTEM=="module", KERNEL=="nvidia*", \
|
||||
TAG+="systemd", ENV{SYSTEMD_WANTS}+="nvidia-cdi-refresh.service"
|
||||
|
||||
# First bind/unbind/change of a GPU PCI function to the NVIDIA driver
|
||||
ACTION=="bind|unbind|change", SUBSYSTEM=="pci", DRIVER=="nvidia", \
|
||||
TAG+="systemd", ENV{SYSTEMD_WANTS}+="nvidia-cdi-refresh.service"
|
@ -55,6 +55,8 @@ RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
COPY deployments/systemd/ .
|
||||
COPY deployments/udev/ .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
@ -46,6 +46,8 @@ RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
COPY deployments/systemd/ .
|
||||
COPY deployments/udev/ .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
@ -71,6 +71,8 @@ RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
COPY deployments/systemd/* ${DIST_DIR}/
|
||||
COPY deployments/udev/* ${DIST_DIR}/
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
@ -53,6 +53,8 @@ RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
COPY deployments/systemd/ .
|
||||
COPY deployments/udev/ .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
@ -29,3 +29,9 @@ Architecture: any
|
||||
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@)
|
||||
Description: NVIDIA Container Toolkit Operator Extensions
|
||||
Provides tools for using the NVIDIA Container Toolkit with the GPU Operator
|
||||
|
||||
Package: nvidia-container-toolkit-cdi-refresh
|
||||
Architecture: any
|
||||
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@)
|
||||
Description: NVIDIA CDI Refresh Service
|
||||
Provides a service to refresh the NVIDIA CDI specification
|
||||
|
@ -0,0 +1,3 @@
|
||||
60-nvidia-cdi-refresh.rules /etc/udev/rules.d/
|
||||
nvidia-cdi-refresh.service /usr/lib/systemd/system/
|
||||
nvidia-cdi-refresh.path /usr/lib/systemd/system/
|
32
packaging/debian/nvidia-container-toolkit-cdi-refresh.postinst
Executable file
32
packaging/debian/nvidia-container-toolkit-cdi-refresh.postinst
Executable file
@ -0,0 +1,32 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
case "$1" in
|
||||
configure)
|
||||
if command -v udevadm >/dev/null 2>&1; then
|
||||
udevadm control --reload || true
|
||||
fi
|
||||
|
||||
if command -v systemctl >/dev/null 2>&1 \
|
||||
&& systemctl --quiet is-system-running 2>/dev/null; then
|
||||
|
||||
systemctl daemon-reload || true
|
||||
|
||||
if [ -z "$2" ]; then # $2 empty → first install
|
||||
systemctl enable --now nvidia-cdi-refresh.path || true
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
|
||||
abort-upgrade|abort-remove|abort-deconfigure)
|
||||
# Nothing to do for these dpkg abort cases
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "postinst called with unknown argument \`$1'" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
exit 0
|
@ -17,6 +17,9 @@ Source3: nvidia-container-runtime
|
||||
Source4: nvidia-container-runtime.cdi
|
||||
Source5: nvidia-container-runtime.legacy
|
||||
Source6: nvidia-cdi-hook
|
||||
Source7: nvidia-cdi-refresh.service
|
||||
Source8: nvidia-cdi-refresh.path
|
||||
Source9: 60-nvidia-cdi-refresh.rules
|
||||
|
||||
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
|
||||
Provides: nvidia-container-runtime
|
||||
@ -28,16 +31,22 @@ Requires: nvidia-container-toolkit-base == %{version}-%{release}
|
||||
Provides tools and utilities to enable GPU support in containers.
|
||||
|
||||
%prep
|
||||
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} .
|
||||
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} %{SOURCE9} .
|
||||
|
||||
%install
|
||||
mkdir -p %{buildroot}%{_bindir}
|
||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
||||
mkdir -p %{buildroot}/etc/udev/rules.d
|
||||
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
|
||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
|
||||
install -m 644 -t %{buildroot}/usr/lib/systemd/system %{SOURCE7}
|
||||
install -m 644 -t %{buildroot}/usr/lib/systemd/system %{SOURCE8}
|
||||
install -m 644 -t %{buildroot}/etc/udev/rules.d %{SOURCE9}
|
||||
|
||||
%post
|
||||
if [ $1 -gt 1 ]; then # only on package upgrade
|
||||
@ -45,6 +54,17 @@ if [ $1 -gt 1 ]; then # only on package upgrade
|
||||
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
|
||||
fi
|
||||
|
||||
# Reload udev so the new rule is active immediately
|
||||
/usr/bin/udevadm control --reload || :
|
||||
|
||||
# Reload systemd unit cache
|
||||
/bin/systemctl daemon-reload || :
|
||||
|
||||
# On fresh install ($1 == 1) enable the path unit so it starts at boot
|
||||
if [ "$1" -eq 1 ]; then
|
||||
/bin/systemctl enable --now nvidia-cdi-refresh.path || :
|
||||
fi
|
||||
|
||||
%posttrans
|
||||
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
|
||||
# repairing lost file nvidia-container-runtime-hook
|
||||
@ -64,6 +84,9 @@ fi
|
||||
%files
|
||||
%license LICENSE
|
||||
%{_bindir}/nvidia-container-runtime-hook
|
||||
%config(noreplace) %{_unitdir}/nvidia-cdi-refresh.service
|
||||
%config(noreplace) %{_unitdir}/nvidia-cdi-refresh.path
|
||||
%config %{_udevrulesdir}/60-nvidia-cdi-refresh.rules
|
||||
|
||||
%changelog
|
||||
# As of 1.10.0-1 we generate the release information automatically
|
||||
|
Loading…
Reference in New Issue
Block a user