From 8d3cb3439006659fcc67b712afaca8d82beb8531 Mon Sep 17 00:00:00 2001 From: clearml <> Date: Mon, 24 Feb 2025 13:41:32 +0200 Subject: [PATCH] Add default support for dns i.e. rocky/centos/fedora containers --- .../backend_api/config/default/agent.conf | 3 ++- clearml_agent/commands/config.py | 2 +- clearml_agent/commands/worker.py | 20 +++++++++++-------- clearml_agent/glue/k8s.py | 20 ++++++++++--------- docs/clearml.conf | 5 +++-- 5 files changed, 29 insertions(+), 21 deletions(-) diff --git a/clearml_agent/backend_api/config/default/agent.conf b/clearml_agent/backend_api/config/default/agent.conf index 2af8949..5b4b6a6 100644 --- a/clearml_agent/backend_api/config/default/agent.conf +++ b/clearml_agent/backend_api/config/default/agent.conf @@ -207,7 +207,8 @@ # docker_args_filters: ["^--env$", "^-e$"] # optional shell script to run in docker when started before the experiment is started - # extra_docker_shell_script: ["apt-get install -y bindfs", ] + # notice that even if one command fails the other commands will still execute + # extra_docker_shell_script: ["apt-get install -y bindfs", "dnf install -y bindfs", "yum install -y bindfs"] # Install the required packages for opencv libraries (libsm6 libxext6 libxrender-dev libglib2.0-0), # for backwards compatibility reasons, true as default, diff --git a/clearml_agent/commands/config.py b/clearml_agent/commands/config.py index 0976425..d5e9a61 100644 --- a/clearml_agent/commands/config.py +++ b/clearml_agent/commands/config.py @@ -191,7 +191,7 @@ def main(): 'agent.git_pass=\"{}\"\n' \ '\n'.format(git_user or '', git_pass or '') f.write(git_credentials) - extra_index_str = '# extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]\n' \ + extra_index_str = '# extra_index_url: ["https://clearml.jfrog.io/clearml/api/pypi/public/simple"]\n' \ 'agent.package_manager.extra_index_url= ' \ '[\n{}\n]\n\n'.format("\n".join(map("\"{}\"".format, extra_index_urls))) f.write(extra_index_str) diff --git a/clearml_agent/commands/worker.py b/clearml_agent/commands/worker.py index 44ec4e2..830b7b8 100644 --- a/clearml_agent/commands/worker.py +++ b/clearml_agent/commands/worker.py @@ -28,7 +28,7 @@ from typing import Text, Optional, Any, Tuple, List, Dict, Mapping, Union from .._vendor import attr from .._vendor import six from .._vendor.pathlib2 import Path -from .._vendor.six.moves.urllib.parse import quote +from .._vendor.six.moves.urllib.parse import quote # noqa from clearml_agent.external.pyhocon import ConfigTree, ConfigFactory from clearml_agent.backend_api.services import auth as auth_api @@ -4616,6 +4616,7 @@ class Worker(ServiceCommandSection): mount_apt_cache = mount_apt_cache or '/var/cache/apt/archives' mount_pip_cache = mount_pip_cache or '/root/.cache/pip' mount_poetry_cache = mount_poetry_cache or '/root/.cache/pypoetry' + mount_git_ro = "{}.git".format(mount_ssh_ro.rstrip("/")) if not standalone_mode: if not bash_script: @@ -4623,18 +4624,21 @@ class Worker(ServiceCommandSection): # python+pip is the requirement to match bash_script = [ "echo 'Binary::apt::APT::Keep-Downloaded-Packages \"true\";' > /etc/apt/apt.conf.d/docker-clean", - "chown -R root /root/.cache/pip", + "chown -R $(whoami) $HOME/.cache/pip", "export DEBIAN_FRONTEND=noninteractive", "export CLEARML_APT_INSTALL=\"$CLEARML_APT_INSTALL{}\"".format( ' libsm6 libxext6 libxrender-dev libglib2.0-0' if install_opencv_libs else ""), "cp -Rf {mount_ssh_ro} -T {mount_ssh}" if host_ssh_cache else "", - "[ ! -z $(which git) ] || export CLEARML_APT_INSTALL=\"$CLEARML_APT_INSTALL git\"", + "cp -Rf {mount_git_ro} -T ~/" if host_git_credentials else "", + "[ ! -z $(which git || command -v git) ] || export CLEARML_APT_INSTALL=\"$CLEARML_APT_INSTALL git\"", "declare LOCAL_PYTHON", - "[ ! -z $LOCAL_PYTHON ] || for i in {{20..5}}; do which {python_single_digit}.$i && " + + "[ ! -z $LOCAL_PYTHON ] || for i in {{20..5}}; do (which {python_single_digit}.$i || command -v {python_single_digit}.$i) && " + "{python_single_digit}.$i -m pip --version && " + - "export LOCAL_PYTHON=$(which {python_single_digit}.$i) && break ; done", + "export LOCAL_PYTHON=$(which {python_single_digit}.$i || command -v git) && break ; done", "[ ! -z $LOCAL_PYTHON ] || export CLEARML_APT_INSTALL=\"$CLEARML_APT_INSTALL {python_single_digit}-pip\"", # noqa - "[ -z \"$CLEARML_APT_INSTALL\" ] || (apt-get update -y ; apt-get install -y $CLEARML_APT_INSTALL)", + "[ -z \"$CLEARML_APT_INSTALL\" ] || " + "(apt-get update -y ; apt-get install -y $CLEARML_APT_INSTALL) || " + "(dnf install -y $CLEARML_APT_INSTALL)", "rm /usr/lib/python3.*/EXTERNALLY-MANAGED", # remove PEP 668 ] @@ -4654,12 +4658,12 @@ class Worker(ServiceCommandSection): python_single_digit=python_version.split('.')[0], python=python_version, pip_version=" ".join(PackageManager.get_pip_versions(wrap='\"')), clearml_agent_wheel=clearml_agent_wheel, - mount_ssh_ro=mount_ssh_ro, mount_ssh=mount_ssh, + mount_ssh_ro=mount_ssh_ro, mount_ssh=mount_ssh, mount_git_ro=mount_git_ro, ) if host_git_credentials: for git_credentials in host_git_credentials: - base_cmd += ['-v', '{}:/root/{}'.format(git_credentials, Path(git_credentials).name)] + base_cmd += ['-v', '{}:{}/{}'.format(git_credentials, mount_git_ro, Path(git_credentials).name)] if docker_bash_setup_script and docker_bash_setup_script.strip('\n '): extra_shell_script = (extra_shell_script or '') + \ diff --git a/clearml_agent/glue/k8s.py b/clearml_agent/glue/k8s.py index dec582f..8e6454a 100644 --- a/clearml_agent/glue/k8s.py +++ b/clearml_agent/glue/k8s.py @@ -56,8 +56,8 @@ class K8sIntegration(Worker): KUBECTL_APPLY_CMD = "kubectl apply --namespace={namespace} -f" BASH_INSTALL_SSH_CMD = [ - "apt-get update", - "apt-get install -y openssh-server", + "(apt-get update -y ; apt-get install -y openssh-server) || " + "(dnf install -y openssh-server)", "mkdir -p /var/run/sshd", "echo 'root:training' | chpasswd", "echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config", @@ -73,9 +73,10 @@ class K8sIntegration(Worker): _CONTAINER_APT_SCRIPT_SECTION = [ "export DEBIAN_FRONTEND='noninteractive'", "echo 'Binary::apt::APT::Keep-Downloaded-Packages \"true\";' > /etc/apt/apt.conf.d/docker-clean", - "chown -R root /root/.cache/pip", - "apt-get update", - "apt-get install -y git libsm6 libxext6 libxrender-dev libglib2.0-0", + "chown -R $(whoami) $HOME/.cache/pip", + "(apt-get update -y ; apt-get install -y git) || " + "(dnf install -y git)" + # should only be added if docker_install_opencv_libs: # libsm6 libxext6 libxrender-dev libglib2.0-0", ] CONTAINER_BASH_SCRIPT = [ @@ -84,9 +85,10 @@ class K8sIntegration(Worker): for line in _CONTAINER_APT_SCRIPT_SECTION ), "declare LOCAL_PYTHON", - "[ ! -z $LOCAL_PYTHON ] || for i in {{20..5}}; do which python3.$i && python3.$i -m pip --version && " - "export LOCAL_PYTHON=$(which python3.$i) && break ; done", - '[ ! -z "$CLEARML_AGENT_SKIP_CONTAINER_APT" ] || [ ! -z "$LOCAL_PYTHON" ] || apt-get install -y python3-pip', + "[ ! -z $LOCAL_PYTHON ] || for i in {{20..5}}; do (which python3.$i || command -v python3.$i) && python3.$i -m pip --version && " + "export LOCAL_PYTHON=$(which python3.$i || command -v python3.$i) && break ; done", + '[ ! -z "$CLEARML_AGENT_SKIP_CONTAINER_APT" ] || [ ! -z "$LOCAL_PYTHON" ] || ' + 'apt-get install -y python3-pip || dnf install -y python3-pip', "[ ! -z $LOCAL_PYTHON ] || export LOCAL_PYTHON=python3", "rm /usr/lib/python3.*/EXTERNALLY-MANAGED", # remove PEP 668 "{extra_bash_init_cmd}", @@ -96,7 +98,7 @@ class K8sIntegration(Worker): ] DEFAULT_POD_NAME_PREFIX = "clearml-id-" - DEFAULT_LIMIT_POD_LABEL = "ai.allegro.agent.serial=pod-{pod_number}" + DEFAULT_LIMIT_POD_LABEL = "ai.clearml.agent.serial=pod-{pod_number}" _edit_hyperparams_version = "2.9" diff --git a/docs/clearml.conf b/docs/clearml.conf index 769b234..b78adfb 100644 --- a/docs/clearml.conf +++ b/docs/clearml.conf @@ -94,7 +94,7 @@ agent { force_upgrade: false, # additional artifact repositories to use when installing python packages - # extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"] + # extra_index_url: ["https://clearml.jfrog.io/clearml/api/pypi/public/simple"] extra_index_url: [] # additional flags to use when calling pip install, example: ["--use-deprecated=legacy-resolver", ] @@ -211,7 +211,8 @@ agent { # protected_docker_extra_args: ["privileged", "security-opt", "network", "ipc"] # optional shell script to run in docker when started before the experiment is started - # extra_docker_shell_script: ["apt-get install -y bindfs", ] + # notice that even if one command fails the other commands will still execute + # extra_docker_shell_script: ["apt-get install -y bindfs", "dnf install -y bindfs", "yum install -y bindfs"] # Install the required packages for opencv libraries (libsm6 libxext6 libxrender-dev libglib2.0-0), # for backwards compatibility reasons, true as default,