From 51cc3da30d93c3784931569a666543314d874e7e Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Mon, 12 May 2025 15:22:42 +0200 Subject: [PATCH] Add nvidia-cdi-refresh service MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Automatic regeneration of /var/run/cdi/nvidia.yaml New units: • nvidia-cdi-refresh.service – one-shot wrapper for nvidia-ctk cdi generate (adds sleep + required caps). • nvidia-cdi-refresh.path – fires on driver install/upgrade via modules.dep.bin changes. Packaging • RPM %post reloads systemd and enables the path unit on fresh installs. • DEB postinst does the same (configure, skip on upgrade). Result: CDI spec is always up to date Signed-off-by: Carlos Eduardo Arango Gutierrez --- deployments/systemd/nvidia-cdi-refresh.path | 23 +++++++++++++++ .../systemd/nvidia-cdi-refresh.service | 26 +++++++++++++++++ docker/Dockerfile.debian | 2 ++ docker/Dockerfile.opensuse-leap | 2 ++ docker/Dockerfile.rpm-yum | 2 ++ docker/Dockerfile.ubuntu | 2 ++ packaging/debian/control | 6 ++++ ...idia-container-toolkit-cdi-refresh.install | 2 ++ ...dia-container-toolkit-cdi-refresh.postinst | 28 +++++++++++++++++++ .../rpm/SPECS/nvidia-container-toolkit.spec | 20 ++++++++++++- 10 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 deployments/systemd/nvidia-cdi-refresh.path create mode 100644 deployments/systemd/nvidia-cdi-refresh.service create mode 100644 packaging/debian/nvidia-container-toolkit-cdi-refresh.install create mode 100755 packaging/debian/nvidia-container-toolkit-cdi-refresh.postinst diff --git a/deployments/systemd/nvidia-cdi-refresh.path b/deployments/systemd/nvidia-cdi-refresh.path new file mode 100644 index 00000000..a1c1e241 --- /dev/null +++ b/deployments/systemd/nvidia-cdi-refresh.path @@ -0,0 +1,23 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[Unit] +Description=Trigger CDI refresh on NVIDIA driver install / uninstall events + +[Path] +PathChanged=/lib/modules/%v/modules.dep +PathChanged=/lib/modules/%v/modules.dep.bin + +[Install] +WantedBy=multi-user.target diff --git a/deployments/systemd/nvidia-cdi-refresh.service b/deployments/systemd/nvidia-cdi-refresh.service new file mode 100644 index 00000000..dae14867 --- /dev/null +++ b/deployments/systemd/nvidia-cdi-refresh.service @@ -0,0 +1,26 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +[Unit] +Description=Refresh NVIDIA CDI specification file +ConditionPathExists=/usr/bin/nvidia-smi + +[Service] +Type=oneshot +ExecCondition=/usr/bin/grep -qE '/nvidia.ko' /lib/modules/%v/modules.dep +ExecStart=/usr/bin/nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml +CapabilityBoundingSet=CAP_SYS_MODULE CAP_SYS_ADMIN CAP_MKNOD + +[Install] +WantedBy=multi-user.target diff --git a/docker/Dockerfile.debian b/docker/Dockerfile.debian index 4b3c535f..7d4d1287 100644 --- a/docker/Dockerfile.debian +++ b/docker/Dockerfile.debian @@ -55,6 +55,8 @@ RUN make PREFIX=${DIST_DIR} cmds WORKDIR $DIST_DIR COPY packaging/debian ./debian +COPY deployments/systemd/ . +COPY deployments/udev/ . ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION} diff --git a/docker/Dockerfile.opensuse-leap b/docker/Dockerfile.opensuse-leap index f1ce31ec..b4f9488e 100644 --- a/docker/Dockerfile.opensuse-leap +++ b/docker/Dockerfile.opensuse-leap @@ -46,6 +46,8 @@ RUN make PREFIX=${DIST_DIR} cmds WORKDIR $DIST_DIR/.. COPY packaging/rpm . +COPY deployments/systemd/ . +COPY deployments/udev/ . ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION} diff --git a/docker/Dockerfile.rpm-yum b/docker/Dockerfile.rpm-yum index 1a429f58..f9e375b5 100644 --- a/docker/Dockerfile.rpm-yum +++ b/docker/Dockerfile.rpm-yum @@ -71,6 +71,8 @@ RUN make PREFIX=${DIST_DIR} cmds WORKDIR $DIST_DIR/.. COPY packaging/rpm . +COPY deployments/systemd/* ${DIST_DIR}/ +COPY deployments/udev/* ${DIST_DIR}/ ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION} diff --git a/docker/Dockerfile.ubuntu b/docker/Dockerfile.ubuntu index cfa930bb..4d56d62d 100644 --- a/docker/Dockerfile.ubuntu +++ b/docker/Dockerfile.ubuntu @@ -53,6 +53,8 @@ RUN make PREFIX=${DIST_DIR} cmds WORKDIR $DIST_DIR COPY packaging/debian ./debian +COPY deployments/systemd/ . +COPY deployments/udev/ . ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION} diff --git a/packaging/debian/control b/packaging/debian/control index 01abcea4..d203407d 100644 --- a/packaging/debian/control +++ b/packaging/debian/control @@ -29,3 +29,9 @@ Architecture: any Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@) Description: NVIDIA Container Toolkit Operator Extensions Provides tools for using the NVIDIA Container Toolkit with the GPU Operator + +Package: nvidia-container-toolkit-cdi-refresh +Architecture: any +Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@) +Description: NVIDIA CDI Refresh Service + Provides a service to refresh the NVIDIA CDI specification diff --git a/packaging/debian/nvidia-container-toolkit-cdi-refresh.install b/packaging/debian/nvidia-container-toolkit-cdi-refresh.install new file mode 100644 index 00000000..2fa353fc --- /dev/null +++ b/packaging/debian/nvidia-container-toolkit-cdi-refresh.install @@ -0,0 +1,2 @@ +nvidia-cdi-refresh.service /etc/systemd/system/ +nvidia-cdi-refresh.path /etc/systemd/system/ diff --git a/packaging/debian/nvidia-container-toolkit-cdi-refresh.postinst b/packaging/debian/nvidia-container-toolkit-cdi-refresh.postinst new file mode 100755 index 00000000..1521d7f3 --- /dev/null +++ b/packaging/debian/nvidia-container-toolkit-cdi-refresh.postinst @@ -0,0 +1,28 @@ +#!/bin/sh + +set -e + +case "$1" in + configure) + if command -v systemctl >/dev/null 2>&1 \ + && systemctl --quiet is-system-running 2>/dev/null; then + + systemctl daemon-reload || true + + if [ -z "$2" ]; then # $2 empty → first install + systemctl enable --now nvidia-cdi-refresh.path || true + fi + fi + ;; + + abort-upgrade|abort-remove|abort-deconfigure) + # Nothing to do for these dpkg abort cases + ;; + + *) + echo "postinst called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +exit 0 diff --git a/packaging/rpm/SPECS/nvidia-container-toolkit.spec b/packaging/rpm/SPECS/nvidia-container-toolkit.spec index 60552827..f5ffcab7 100644 --- a/packaging/rpm/SPECS/nvidia-container-toolkit.spec +++ b/packaging/rpm/SPECS/nvidia-container-toolkit.spec @@ -17,6 +17,8 @@ Source3: nvidia-container-runtime Source4: nvidia-container-runtime.cdi Source5: nvidia-container-runtime.legacy Source6: nvidia-cdi-hook +Source7: nvidia-cdi-refresh.service +Source8: nvidia-cdi-refresh.path Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2 Provides: nvidia-container-runtime @@ -28,16 +30,21 @@ Requires: nvidia-container-toolkit-base == %{version}-%{release} Provides tools and utilities to enable GPU support in containers. %prep -cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} . +cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} . %install mkdir -p %{buildroot}%{_bindir} +mkdir -p %{buildroot}/etc/systemd/system/ +mkdir -p %{buildroot}/etc/udev/rules.d + install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook +install -m 644 -t %{buildroot}/etc/systemd/system %{SOURCE7} +install -m 644 -t %{buildroot}/etc/systemd/system %{SOURCE8} %post if [ $1 -gt 1 ]; then # only on package upgrade @@ -45,6 +52,14 @@ if [ $1 -gt 1 ]; then # only on package upgrade cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit fi +# Reload systemd unit cache +/bin/systemctl daemon-reload || : + +# On fresh install ($1 == 1) enable the path unit so it starts at boot +if [ "$1" -eq 1 ]; then + /bin/systemctl enable --now nvidia-cdi-refresh.path || : +fi + %posttrans if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then # repairing lost file nvidia-container-runtime-hook @@ -64,6 +79,9 @@ fi %files %license LICENSE %{_bindir}/nvidia-container-runtime-hook +%config /etc/systemd/system/nvidia-cdi-refresh.service +%config /etc/systemd/system/nvidia-cdi-refresh.path +%dir /etc/systemd/system %changelog # As of 1.10.0-1 we generate the release information automatically