diff --git a/test/container/common.sh b/test/container/common.sh new file mode 100644 index 00000000..facfdde4 --- /dev/null +++ b/test/container/common.sh @@ -0,0 +1,117 @@ +#! /bin/bash +# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +readonly CRIO_HOOKS_DIR="/usr/share/containers/oci/hooks.d" +readonly CRIO_HOOK_FILENAME="oci-nvidia-hook.json" + +# shellcheck disable=SC2015 +[ -t 2 ] && readonly LOG_TTY=1 || readonly LOG_NO_TTY=1 + +if [ "${LOG_TTY-0}" -eq 1 ] && [ "$(tput colors)" -ge 15 ]; then + readonly FMT_BOLD=$(tput bold) + readonly FMT_RED=$(tput setaf 1) + readonly FMT_YELLOW=$(tput setaf 3) + readonly FMT_BLUE=$(tput setaf 12) + readonly FMT_CLEAR=$(tput sgr0) +fi + +log() { + local -r level="$1"; shift + local -r message="$*" + + local fmt_on="${FMT_CLEAR-}" + local -r fmt_off="${FMT_CLEAR-}" + + case "${level}" in + INFO) fmt_on="${FMT_BLUE-}" ;; + WARN) fmt_on="${FMT_YELLOW-}" ;; + ERROR) fmt_on="${FMT_RED-}" ;; + esac + printf "%s[%s]%s %b\n" "${fmt_on}" "${level}" "${fmt_off}" "${message}" >&2 +} + +with_retry() { + local max_attempts="$1" + local delay="$2" + local count=0 + local rc + shift 2 + + while true; do + set +e + "$@"; rc="$?" + set -e + + count="$((count+1))" + + if [[ "${rc}" -eq 0 ]]; then + return 0 + fi + + if [[ "${max_attempts}" -le 0 ]] || [[ "${count}" -lt "${max_attempts}" ]]; then + sleep "${delay}" + else + break + fi + done + + return 1 +} + +testing::setup() { + cp -Rp ${basedir}/shared ${shared_dir} + mkdir -p "${shared_dir}/etc/containerd" + mkdir -p "${shared_dir}/etc/docker" + mkdir -p "${shared_dir}/run/docker/containerd" + mkdir -p "${shared_dir}/run/nvidia" + mkdir -p "${shared_dir}/usr/local/nvidia" + mkdir -p "${shared_dir}${CRIO_HOOKS_DIR}" +} + +testing::cleanup() { + if [[ "${CLEANUP}" == "false" ]]; then + echo "Skipping cleanup: CLEANUP=${CLEANUP}" + return 0 + fi + if [[ -e "${shared_dir}" ]]; then + docker run --rm \ + -v "${shared_dir}:/work" \ + alpine sh -c 'rm -rf /work/*' + rmdir "${shared_dir}" + fi + + if [[ "${test_cases:-""}" == "" ]]; then + echo "No test cases defined. Skipping test case cleanup" + return 0 + fi + + for tc in ${test_cases}; do + testing::${tc}::cleanup + done +} + +testing::docker_run::toolkit::shell() { + docker run --rm --privileged \ + --entrypoint sh \ + -v "${shared_dir}/etc/containerd:/etc/containerd" \ + -v "${shared_dir}/etc/docker:/etc/docker" \ + -v "${shared_dir}/run/docker/containerd:/run/docker/containerd" \ + -v "${shared_dir}/run/nvidia:/run/nvidia" \ + -v "${shared_dir}/usr/local/nvidia:/usr/local/nvidia" \ + -v "${shared_dir}${CRIO_HOOKS_DIR}:${CRIO_HOOKS_DIR}" \ + "${toolkit_container_image}" "-c" "$*" +} + + diff --git a/test/container/containerd_test.sh b/test/container/containerd_test.sh new file mode 100755 index 00000000..8b2d53ba --- /dev/null +++ b/test/container/containerd_test.sh @@ -0,0 +1,147 @@ +#! /bin/bash +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +readonly containerd_dind_ctr="container-config-containerd-dind-ctr-name" +readonly containerd_test_ctr="container-config-containerd-test-ctr-name" +readonly containerd_dind_socket="/run/nvidia/docker.sock" +readonly containerd_dind_containerd_dir="/run/docker/containerd" + +testing::containerd::dind::setup() { + # Docker creates /etc/docker when starting + # by default there isn't any config in this directory (even after the daemon starts) + docker run -d --rm --privileged \ + -v "${shared_dir}/etc/docker:/etc/docker" \ + -v "${shared_dir}/run/nvidia:/run/nvidia" \ + -v "${shared_dir}/usr/local/nvidia:/usr/local/nvidia" \ + -v "${shared_dir}/run/docker/containerd:/run/docker/containerd" \ + --name "${containerd_dind_ctr}" \ + docker:stable-dind -H unix://${containerd_dind_socket} +} + +testing::containerd::dind::exec() { + docker exec "${containerd_dind_ctr}" sh -c "$*" +} + +testing::containerd::toolkit::run() { + local version=${1} + + # We run ctr image list to ensure that containerd has successfully started in the docker-in-docker container + with_retry 5 5s testing::containerd::dind::exec " \ + ctr --address=${containerd_dind_containerd_dir}/containerd.sock image list -q" + + # Ensure that we can run some non GPU containers from within dind + with_retry 3 5s testing::containerd::dind::exec " \ + ctr --address=${containerd_dind_containerd_dir}/containerd.sock image pull nvcr.io/nvidia/cuda:11.1-base; \ + ctr --address=${containerd_dind_containerd_dir}/containerd.sock run --rm --runtime=io.containerd.runtime.v1.linux nvcr.io/nvidia/cuda:11.1-base cuda echo foo" + + # Share the volumes so that we can edit the config file and point to the new runtime + # Share the pid so that we can ask docker to reload its config + docker run --rm --privileged \ + --volumes-from "${containerd_dind_ctr}" \ + -v "${shared_dir}/etc/containerd/config_${version}.toml:${containerd_dind_containerd_dir}/containerd.toml" \ + --pid "container:${containerd_dind_ctr}" \ + -e "RUNTIME=containerd" \ + -e "RUNTIME_ARGS=--config=${containerd_dind_containerd_dir}/containerd.toml --socket=${containerd_dind_containerd_dir}/containerd.sock" \ + --name "${containerd_test_ctr}" \ + "${toolkit_container_image}" "/usr/local/nvidia" "--no-daemon" + + # We run ctr image list to ensure that containerd has successfully started in the docker-in-docker container + with_retry 5 5s testing::containerd::dind::exec " \ + ctr --address=${containerd_dind_containerd_dir}/containerd.sock image list -q" + + # Ensure that we haven't broken non GPU containers + with_retry 3 5s testing::containerd::dind::exec " \ + ctr --address=${containerd_dind_containerd_dir}/containerd.sock image pull nvcr.io/nvidia/cuda:11.1-base; \ + ctr --address=${containerd_dind_containerd_dir}/containerd.sock run --rm --runtime=io.containerd.runtime.v1.linux nvcr.io/nvidia/cuda:11.1-base cuda echo foo" +} + +# This test runs containerd setup and containerd cleanup in succession to ensure that the +# config is restored correctly. +testing::containerd::toolkit::test_config() { + local version=${1} + + # We run ctr image list to ensure that containerd has successfully started in the docker-in-docker container + with_retry 5 5s testing::containerd::dind::exec " \ + ctr --address=${containerd_dind_containerd_dir}/containerd.sock image list -q" + + local input_config="${shared_dir}/etc/containerd/config_${version}.toml" + local output_config="${shared_dir}/output/config_${version}.toml" + local output_dir=$(dirname ${output_config}) + + mkdir -p ${output_dir} + cp -p "${input_config}" "${output_config}" + + docker run --rm --privileged \ + --volumes-from "${containerd_dind_ctr}" \ + -v "${output_dir}:${output_dir}" \ + --name "${containerd_test_ctr}" \ + --entrypoint sh \ + "${toolkit_container_image}" -c "containerd setup \ + --config=${output_config} \ + --socket=${containerd_dind_containerd_dir}/containerd.sock \ + --restart-mode=NONE \ + /usr/local/nvidia/toolkit" + + # As a basic test we check that the config has changed + diff "${input_config}" "${output_config}" || test ${?} -ne 0 + grep -q -E "^version = \d" "${output_config}" + grep -q -E "default_runtime_name = \"nvidia\"" "${output_config}" + + docker run --rm --privileged \ + --volumes-from "${containerd_dind_ctr}" \ + -v "${output_dir}:${output_dir}" \ + --name "${containerd_test_ctr}" \ + --entrypoint sh \ + "${toolkit_container_image}" -c "containerd cleanup \ + --config=${output_config} \ + --socket=${containerd_dind_containerd_dir}/containerd.sock \ + --restart-mode=NONE \ + /usr/local/nvidia/toolkit" + + if [[ -s "${input_config}" ]]; then + # Compare the input and output config. These should be the same. + diff "${input_config}" "${output_config}" || true + else + # If the input config is empty, the output should not exist. + test ! -e "${output_config}" + fi +} + +testing::containerd::main() { + testing::containerd::dind::setup + + testing::containerd::toolkit::test_config empty + testing::containerd::toolkit::test_config v1 + testing::containerd::toolkit::test_config v2 + + testing::containerd::cleanup + + testing::containerd::dind::setup + testing::containerd::toolkit::run empty + testing::containerd::cleanup + + testing::containerd::dind::setup + testing::containerd::toolkit::run v1 + testing::containerd::cleanup + + testing::containerd::dind::setup + testing::containerd::toolkit::run v2 + testing::containerd::cleanup +} + +testing::containerd::cleanup() { + docker kill "${containerd_dind_ctr}" &> /dev/null || true + docker kill "${containerd_test_ctr}" &> /dev/null || true +} diff --git a/test/container/crio_test.sh b/test/container/crio_test.sh new file mode 100644 index 00000000..a883debb --- /dev/null +++ b/test/container/crio_test.sh @@ -0,0 +1,42 @@ +#! /bin/bash +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +testing::crio::hook_created() { + testing::docker_run::toolkit::shell 'crio setup /run/nvidia/toolkit' + + test ! -z "$(ls -A "${shared_dir}${CRIO_HOOKS_DIR}")" + + cat "${shared_dir}${CRIO_HOOKS_DIR}/${CRIO_HOOK_FILENAME}" | \ + jq -r '.hook.path' | grep -q "/run/nvidia/toolkit/" + test $? -eq 0 + cat "${shared_dir}${CRIO_HOOKS_DIR}/${CRIO_HOOK_FILENAME}" | \ + jq -r '.hook.env[0]' | grep -q ":/run/nvidia/toolkit" + test $? -eq 0 +} + +testing::crio::hook_cleanup() { + testing::docker_run::toolkit::shell 'crio cleanup' + + test -z "$(ls -A "${shared_dir}${CRIO_HOOKS_DIR}")" +} + +testing::crio::main() { + testing::crio::hook_created + testing::crio::hook_cleanup +} + +testing::crio::cleanup() { + : +} diff --git a/test/container/docker_test.sh b/test/container/docker_test.sh new file mode 100755 index 00000000..6d66adbe --- /dev/null +++ b/test/container/docker_test.sh @@ -0,0 +1,57 @@ +#! /bin/bash +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +readonly docker_dind_ctr="container-config-docker-dind-ctr-name" +readonly docker_test_ctr="container-config-docker-test-ctr-name" +readonly docker_dind_socket="/run/nvidia/docker.sock" + +testing::docker::dind::setup() { + # Docker creates /etc/docker when starting + # by default there isn't any config in this directory (even after the daemon starts) + docker run -d --rm --privileged \ + -v "${shared_dir}/etc/docker:/etc/docker" \ + -v "${shared_dir}/run/nvidia:/run/nvidia" \ + -v "${shared_dir}/usr/local/nvidia:/usr/local/nvidia" \ + --name "${docker_dind_ctr}" \ + docker:stable-dind -H unix://${docker_dind_socket} +} + +testing::docker::dind::exec() { + docker exec "${docker_dind_ctr}" sh -c "$*" +} + +testing::docker::toolkit::run() { + # Share the volumes so that we can edit the config file and point to the new runtime + # Share the pid so that we can ask docker to reload its config + docker run -d --rm --privileged \ + --volumes-from "${docker_dind_ctr}" \ + --pid "container:${docker_dind_ctr}" \ + -e "RUNTIME_ARGS=--socket ${docker_dind_socket}" \ + --name "${docker_test_ctr}" \ + "${toolkit_container_image}" "/usr/local/nvidia" "--no-daemon" + + # Ensure that we haven't broken non GPU containers + with_retry 3 5s testing::docker::dind::exec docker run -t alpine echo foo +} + +testing::docker::main() { + testing::docker::dind::setup + testing::docker::toolkit::run +} + +testing::docker::cleanup() { + docker kill "${docker_dind_ctr}" &> /dev/null || true + docker kill "${docker_test_ctr}" &> /dev/null || true +} diff --git a/test/container/main.sh b/test/container/main.sh new file mode 100644 index 00000000..9b6c91f9 --- /dev/null +++ b/test/container/main.sh @@ -0,0 +1,77 @@ +#! /bin/bash +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eEuo pipefail +shopt -s lastpipe + +readonly basedir="$(dirname "$(realpath "$0")")" +source "${basedir}/common.sh" + +source "${basedir}/toolkit_test.sh" +source "${basedir}/docker_test.sh" +source "${basedir}/crio_test.sh" +source "${basedir}/containerd_test.sh" + +: ${CLEANUP:=true} + +usage() { + cat >&2 <