mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Add nvidia-cdi-refresh service
Some checks failed
Some checks failed
Automatic regeneration of /var/run/cdi/nvidia.yaml New units: • nvidia-cdi-refresh.service – one-shot wrapper for nvidia-ctk cdi generate (adds sleep + required caps). • nvidia-cdi-refresh.path – fires on driver install/upgrade via modules.dep.bin changes. • 60-nvidia-cdi-refresh.rules – udev triggers for module add/remove, PCI bind/unbind/change, and MIG /dev/nvidia-caps* char-device events. Packaging • RPM %post reloads udev/systemd and enables the path unit on fresh installs. • DEB postinst does the same (configure, skip on upgrade). Result: CDI spec is always up to date Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
This commit is contained in:
parent
adb5e6719d
commit
a641d387c5
23
deployments/systemd/nvidia-cdi-refresh.path
Normal file
23
deployments/systemd/nvidia-cdi-refresh.path
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=Trigger CDI refresh on NVIDIA driver install / uninstall events
|
||||||
|
|
||||||
|
[Path]
|
||||||
|
# depmod rewrites these exactly once per (un)install
|
||||||
|
PathChanged=/lib/modules/%v/modules.dep.bin
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
26
deployments/systemd/nvidia-cdi-refresh.service
Normal file
26
deployments/systemd/nvidia-cdi-refresh.service
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
[Unit]
|
||||||
|
Description=Refresh NVIDIA CDI specification file
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
# The 30-second delay ensures that dependent services or resources are fully initialized.
|
||||||
|
ExecStartPre=/bin/sleep 30
|
||||||
|
ExecStart=/usr/bin/nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml
|
||||||
|
CapabilityBoundingSet=CAP_SYS_MODULE CAP_SYS_ADMIN CAP_MKNOD
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
21
deployments/udev/60-nvidia-cdi-refresh.rules
Normal file
21
deployments/udev/60-nvidia-cdi-refresh.rules
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# NVIDIA kernel-module events
|
||||||
|
ACTION=="add|remove", SUBSYSTEM=="module", KERNEL=="nvidia*", \
|
||||||
|
TAG+="systemd", ENV{SYSTEMD_WANTS}+="nvidia-cdi-refresh.service"
|
||||||
|
|
||||||
|
# First bind/unbind/change of a GPU PCI function to the NVIDIA driver
|
||||||
|
ACTION=="bind|unbind|change", SUBSYSTEM=="pci", DRIVER=="nvidia", \
|
||||||
|
TAG+="systemd", ENV{SYSTEMD_WANTS}+="nvidia-cdi-refresh.service"
|
||||||
@ -55,6 +55,8 @@ RUN make PREFIX=${DIST_DIR} cmds
|
|||||||
|
|
||||||
WORKDIR $DIST_DIR
|
WORKDIR $DIST_DIR
|
||||||
COPY packaging/debian ./debian
|
COPY packaging/debian ./debian
|
||||||
|
COPY deployments/systemd/ .
|
||||||
|
COPY deployments/udev/ .
|
||||||
|
|
||||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||||
|
|||||||
@ -46,6 +46,8 @@ RUN make PREFIX=${DIST_DIR} cmds
|
|||||||
|
|
||||||
WORKDIR $DIST_DIR/..
|
WORKDIR $DIST_DIR/..
|
||||||
COPY packaging/rpm .
|
COPY packaging/rpm .
|
||||||
|
COPY deployments/systemd/ .
|
||||||
|
COPY deployments/udev/ .
|
||||||
|
|
||||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||||
|
|||||||
@ -71,6 +71,8 @@ RUN make PREFIX=${DIST_DIR} cmds
|
|||||||
|
|
||||||
WORKDIR $DIST_DIR/..
|
WORKDIR $DIST_DIR/..
|
||||||
COPY packaging/rpm .
|
COPY packaging/rpm .
|
||||||
|
COPY deployments/systemd/* ${DIST_DIR}/
|
||||||
|
COPY deployments/udev/* ${DIST_DIR}/
|
||||||
|
|
||||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||||
|
|||||||
@ -53,6 +53,8 @@ RUN make PREFIX=${DIST_DIR} cmds
|
|||||||
|
|
||||||
WORKDIR $DIST_DIR
|
WORKDIR $DIST_DIR
|
||||||
COPY packaging/debian ./debian
|
COPY packaging/debian ./debian
|
||||||
|
COPY deployments/systemd/ .
|
||||||
|
COPY deployments/udev/ .
|
||||||
|
|
||||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||||
|
|||||||
@ -29,3 +29,9 @@ Architecture: any
|
|||||||
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@)
|
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@)
|
||||||
Description: NVIDIA Container Toolkit Operator Extensions
|
Description: NVIDIA Container Toolkit Operator Extensions
|
||||||
Provides tools for using the NVIDIA Container Toolkit with the GPU Operator
|
Provides tools for using the NVIDIA Container Toolkit with the GPU Operator
|
||||||
|
|
||||||
|
Package: nvidia-container-toolkit-cdi-refresh
|
||||||
|
Architecture: any
|
||||||
|
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@)
|
||||||
|
Description: NVIDIA CDI Refresh Service
|
||||||
|
Provides a service to refresh the NVIDIA CDI specification
|
||||||
|
|||||||
@ -0,0 +1,3 @@
|
|||||||
|
60-nvidia-cdi-refresh.rules /etc/udev/rules.d/
|
||||||
|
nvidia-cdi-refresh.service /etc/systemd/system/
|
||||||
|
nvidia-cdi-refresh.path /etc/systemd/system/
|
||||||
32
packaging/debian/nvidia-container-toolkit-cdi-refresh.postinst
Executable file
32
packaging/debian/nvidia-container-toolkit-cdi-refresh.postinst
Executable file
@ -0,0 +1,32 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
case "$1" in
|
||||||
|
configure)
|
||||||
|
if command -v udevadm >/dev/null 2>&1; then
|
||||||
|
udevadm control --reload || true
|
||||||
|
fi
|
||||||
|
|
||||||
|
if command -v systemctl >/dev/null 2>&1 \
|
||||||
|
&& systemctl --quiet is-system-running 2>/dev/null; then
|
||||||
|
|
||||||
|
systemctl daemon-reload || true
|
||||||
|
|
||||||
|
if [ -z "$2" ]; then # $2 empty → first install
|
||||||
|
systemctl enable --now nvidia-cdi-refresh.path || true
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
abort-upgrade|abort-remove|abort-deconfigure)
|
||||||
|
# Nothing to do for these dpkg abort cases
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
echo "postinst called with unknown argument \`$1'" >&2
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
exit 0
|
||||||
@ -17,6 +17,9 @@ Source3: nvidia-container-runtime
|
|||||||
Source4: nvidia-container-runtime.cdi
|
Source4: nvidia-container-runtime.cdi
|
||||||
Source5: nvidia-container-runtime.legacy
|
Source5: nvidia-container-runtime.legacy
|
||||||
Source6: nvidia-cdi-hook
|
Source6: nvidia-cdi-hook
|
||||||
|
Source7: nvidia-cdi-refresh.service
|
||||||
|
Source8: nvidia-cdi-refresh.path
|
||||||
|
Source9: 60-nvidia-cdi-refresh.rules
|
||||||
|
|
||||||
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
|
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
|
||||||
Provides: nvidia-container-runtime
|
Provides: nvidia-container-runtime
|
||||||
@ -28,16 +31,22 @@ Requires: nvidia-container-toolkit-base == %{version}-%{release}
|
|||||||
Provides tools and utilities to enable GPU support in containers.
|
Provides tools and utilities to enable GPU support in containers.
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} .
|
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} %{SOURCE9} .
|
||||||
|
|
||||||
%install
|
%install
|
||||||
mkdir -p %{buildroot}%{_bindir}
|
mkdir -p %{buildroot}%{_bindir}
|
||||||
|
mkdir -p %{buildroot}/etc/systemd/system/
|
||||||
|
mkdir -p %{buildroot}/etc/udev/rules.d
|
||||||
|
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
|
||||||
|
install -m 644 -t %{buildroot}/etc/systemd/system %{SOURCE7}
|
||||||
|
install -m 644 -t %{buildroot}/etc/systemd/system %{SOURCE8}
|
||||||
|
install -m 644 -t %{buildroot}/etc/udev/rules.d %{SOURCE9}
|
||||||
|
|
||||||
%post
|
%post
|
||||||
if [ $1 -gt 1 ]; then # only on package upgrade
|
if [ $1 -gt 1 ]; then # only on package upgrade
|
||||||
@ -45,6 +54,17 @@ if [ $1 -gt 1 ]; then # only on package upgrade
|
|||||||
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
|
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Reload udev so the new rule is active immediately
|
||||||
|
/usr/bin/udevadm control --reload || :
|
||||||
|
|
||||||
|
# Reload systemd unit cache
|
||||||
|
/bin/systemctl daemon-reload || :
|
||||||
|
|
||||||
|
# On fresh install ($1 == 1) enable the path unit so it starts at boot
|
||||||
|
if [ "$1" -eq 1 ]; then
|
||||||
|
/bin/systemctl enable --now nvidia-cdi-refresh.path || :
|
||||||
|
fi
|
||||||
|
|
||||||
%posttrans
|
%posttrans
|
||||||
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
|
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
|
||||||
# repairing lost file nvidia-container-runtime-hook
|
# repairing lost file nvidia-container-runtime-hook
|
||||||
@ -64,6 +84,11 @@ fi
|
|||||||
%files
|
%files
|
||||||
%license LICENSE
|
%license LICENSE
|
||||||
%{_bindir}/nvidia-container-runtime-hook
|
%{_bindir}/nvidia-container-runtime-hook
|
||||||
|
%config /etc/systemd/system/nvidia-cdi-refresh.service
|
||||||
|
%config /etc/systemd/system/nvidia-cdi-refresh.path
|
||||||
|
%dir /etc/systemd/system
|
||||||
|
%config /etc/udev/rules.d/60-nvidia-cdi-refresh.rules
|
||||||
|
%dir /etc/udev/rules.d
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
# As of 1.10.0-1 we generate the release information automatically
|
# As of 1.10.0-1 we generate the release information automatically
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user