Add docker example for running the agent k8s glue as a pod in a k8s cluster

2025-06-26 18:16:15 +00:00 · 2021-08-03 11:23:33 +03:00 · 2021-08-03 11:23:33 +03:00 · 84706ba66d
commit 84706ba66d
parent 6b602889a5
8 changed files with 641 additions and 0 deletions
--- a/docker/k8s-glue/README
+++ b/docker/k8s-glue/README
@ -0,0 +1,8 @@
 This folder contains an example docker and templates for running the k8s glue as a pod in a k8s cluster
 Please note that ClearML credentials and server addresses should either be filled in the clearml.conf file before
 building the glue docker or provided in the k8s-glue.yml template.
 To run, you'll need to:
 * Create a secret from pod_template.yml: `kubectl create secret generic k8s-glue-pod-template --from-file=pod_template.yml`
 * Apply the k8s glue template: `kubectl apply -f k8s-glue.yml`
--- a/docker/k8s-glue/glue-build/Dockerfile
+++ b/docker/k8s-glue/glue-build/Dockerfile
@ -0,0 +1,18 @@
 FROM ubuntu:18.04
 USER root
 WORKDIR /root
 ENV LC_ALL=en_US.UTF-8
 ENV LANG=en_US.UTF-8
 ENV LANGUAGE=en_US.UTF-8
 ENV PYTHONIOENCODING=UTF-8
 COPY ./entrypoint.sh /root/entrypoint.sh
 COPY ./k8s_glue_example.py /root/k8s_glue_example.py
 COPY ./setup.sh /root/setup.sh
 COPY ./clearml.conf /root/clearml.conf
 RUN /root/setup.sh
 ENTRYPOINT ["/root/entrypoint.sh"]
--- a/docker/k8s-glue/glue-build/clearml.conf
+++ b/docker/k8s-glue/glue-build/clearml.conf
@ -0,0 +1,402 @@
 # CLEARML-AGENT configuration file
 api {
    # Notice: 'host' is the api server (default port 8008), not the web server.
    api_server: ""
    web_server: ""
    files_server: ""
    # Override with os environment: CLEARML_API_ACCESS_KEY / CLEARML_API_SECRET_KEY
    credentials {"access_key": "", "secret_key": ""}
 }
 # Set GIT user/pass credentials
 # leave blank for GIT SSH credentials
 agent.git_user=""
 agent.git_pass=""
 # extra_index_url: ["https://allegroai.jfrog.io/clearml/api/pypi/public/simple"]
 agent.package_manager.extra_index_url= [
 ]
 agent {
    # unique name of this worker, if None, created based on hostname:process_id
    # Override with os environment: CLEARML_WORKER_ID
    # worker_id: "clearml-agent-machine1:gpu0"
    worker_id: ""
    # worker name, replaces the hostname when creating a unique name for this worker
    # Override with os environment: CLEARML_WORKER_NAME
    # worker_name: "clearml-agent-machine1"
    worker_name: ""
    # Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
    # leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
    # git_user: ""
    # git_pass: ""
    # git_host: ""
    # Force GIT protocol to use SSH regardless of the git url (Assumes GIT user/pass are blank)
    force_git_ssh_protocol: false
    # Force a specific SSH port when converting http to ssh links (the domain is kept the same)
    # force_git_ssh_port: 0
    # Force a specific SSH username when converting http to ssh links (the default username is 'git')
    # force_git_ssh_user: git
    # Set the python version to use when creating the virtual environment and launching the experiment
    # Example values: "/usr/bin/python3" or "/usr/local/bin/python3.6"
    # The default is the python executing the clearml_agent
    python_binary: ""
    # ignore any requested python version (Default: False, if a Task was using a
    # specific python version and the system supports multiple python the agent will use the requested python version)
    # ignore_requested_python_version: true
    # select python package manager:
    # currently supported pip and conda
    # poetry is used if pip selected and repository contains poetry.lock file
    package_manager: {
        # supported options: pip, conda, poetry
        type: pip,
        # specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
        pip_version: "<20.2",
        # virtual environment inheres packages from system
        system_site_packages: false,
        # install with --upgrade
        force_upgrade: false,
        # additional artifact repositories to use when installing python packages
        # extra_index_url: ["https://allegroai.jfrog.io/clearmlai/api/pypi/public/simple"]
        # additional conda channels to use when installing with conda package manager
        conda_channels: ["pytorch", "conda-forge", "defaults", ]
        # If set to true, Task's "installed packages" are ignored,
        # and the repository's "requirements.txt" is used instead
        # force_repo_requirements_txt: false
        # set the priority packages to be installed before the rest of the required packages
        # priority_packages: ["cython", "numpy", "setuptools", ]
        # set the optional priority packages to be installed before the rest of the required packages,
        # In case a package installation fails, the package will be ignored,
        # and the virtual environment process will continue
        # priority_optional_packages: ["pygobject", ]
        # set the post packages to be installed after all the rest of the required packages
        # post_packages: ["horovod", ]
        # set the optional post packages to be installed after all the rest of the required packages,
        # In case a package installation fails, the package will be ignored,
        # and the virtual environment process will continue
        # post_optional_packages: []
        # set to True to support torch nightly build installation,
        # notice: torch nightly builds are ephemeral and are deleted from time to time
        torch_nightly: false,
    },
    # target folder for virtual environments builds, created when executing experiment
    venvs_dir = ~/.clearml/venvs-builds
    # cached virtual environment folder
    venvs_cache: {
        # maximum number of cached venvs
        max_entries: 10
        # minimum required free space to allow for cache entry, disable by passing 0 or negative value
        free_space_threshold_gb: 2.0
        # unmark to enable virtual environment caching
        # path: ~/.clearml/venvs-cache
    },
    # cached git clone folder
    vcs_cache: {
        enabled: true,
        path: ~/.clearml/vcs-cache
    },
    # use venv-update in order to accelerate python virtual environment building
    # Still in beta, turned off by default
    venv_update: {
        enabled: false,
    },
    # cached folder for specific python package download (used for pytorch package caching)
    pip_download_cache {
        enabled: true,
        path: ~/.clearml/pip-download-cache
    },
    translate_ssh: true,
    # reload configuration file every daemon execution
    reload_config: false,
    # pip cache folder mapped into docker, used for python package caching
    docker_pip_cache = ~/.clearml/pip-cache
    # apt cache folder mapped into docker, used for ubuntu package caching
    docker_apt_cache = ~/.clearml/apt-cache
    # optional arguments to pass to docker image
    # these are local for this agent and will not be updated in the experiment's docker_cmd section
    # extra_docker_arguments: ["--ipc=host", ]
    # optional shell script to run in docker when started before the experiment is started
    # extra_docker_shell_script: ["apt-get install -y bindfs", ]
    # Install the required packages for opencv libraries (libsm6 libxext6 libxrender-dev libglib2.0-0),
    # for backwards compatibility reasons, true as default,
    # change to false to skip installation and decrease docker spin up time
    # docker_install_opencv_libs: true
    # optional uptime configuration, make sure to use only one of 'uptime/downtime' and not both.
    # If uptime is specified, agent will actively poll (and execute) tasks in the time-spans defined here.
    # Outside of the specified time-spans, the agent will be idle.
    # Defined using a list of items of the format: "<hours> <days>".
    # hours - use values 0-23, single values would count as start hour and end at midnight.
    # days - use days in abbreviated format (SUN-SAT)
    # use '-' for ranges and ',' to separate singular values.
    # for example, to enable the workers every Sunday and Tuesday between 17:00-20:00 set uptime to:
    # uptime: ["17-20 SUN,TUE"]
    # optional downtime configuration, can be used only when uptime is not used.
    # If downtime is specified, agent will be idle in the time-spans defined here.
    # Outside of the specified time-spans, the agent will actively poll (and execute) tasks.
    # Use the same format as described above for uptime
    # downtime: []
    # set to true in order to force "docker pull" before running an experiment using a docker image.
    # This makes sure the docker image is updated.
    docker_force_pull: false
    default_docker: {
        # default docker image to use when running in docker mode
        image: "nvidia/cuda:10.1-cudnn7-runtime-ubuntu18.04"
        # optional arguments to pass to docker image
        # arguments: ["--ipc=host", ]
    }
    # set the OS environments based on the Task's Environment section before launching the Task process.
    enable_task_env: false
    # set the initial bash script to execute at the startup of any docker.
    # all lines will be executed regardless of their exit code.
    # {python_single_digit} is translated to 'python3' or 'python2' according to requested python version
    # docker_init_bash_script = [
    #     "echo 'Binary::apt::APT::Keep-Downloaded-Packages \"true\";' > /etc/apt/apt.conf.d/docker-clean",
    #     "chown -R root /root/.cache/pip",
    #     "apt-get update",
    #     "apt-get install -y git libsm6 libxext6 libxrender-dev libglib2.0-0",
    #     "(which {python_single_digit} && {python_single_digit} -m pip --version) || apt-get install -y {python_single_digit}-pip",
    # ]
    # set the preprocessing bash script to execute at the startup of any docker.
    # all lines will be executed regardless of their exit code.
    # docker_preprocess_bash_script = [
    #     "echo \"starting docker\"",
    #]
    # If False replace \r with \n and display full console output
    # default is True, report a single \r line in a sequence of consecutive lines, per 5 seconds.
    # suppress_carriage_return: true
    # cuda versions used for solving pytorch wheel packages
    # should be detected automatically. Override with os environment CUDA_VERSION / CUDNN_VERSION
    # cuda_version: 10.1
    # cudnn_version: 7.6
    # Hide docker environment variables containing secrets when printing out the docker command by replacing their
    # values with "********". Turning this feature on will hide the following environment variables values:
    #   CLEARML_API_SECRET_KEY, CLEARML_AGENT_GIT_PASS, AWS_SECRET_ACCESS_KEY, AZURE_STORAGE_KEY
    # To include more environment variables, add their keys to the "extra_keys" list. E.g. to make sure the value of
    # your custom environment variable named MY_SPECIAL_PASSWORD will not show in the logs when included in the
    # docker command, set:
    #   extra_keys: ["MY_SPECIAL_PASSWORD"]
    hide_docker_command_env_vars {
        enabled: true
        extra_keys: []
    }
 }
 sdk {
    # ClearML - default SDK configuration
    storage {
        cache {
            # Defaults to system temp folder / cache
            default_base_dir: "~/.clearml/cache"
            size {
                # max_used_bytes = -1
                min_free_bytes = 10GB
                # cleanup_margin_percent = 5%
            }
        }
        direct_access: [
            # Objects matching are considered to be available for direct access, i.e. they will not be downloaded
            # or cached, and any download request will return a direct reference.
            # Objects are specified in glob format, available for url and content_type.
            { url: "file://*" }  # file-urls are always directly referenced
        ]
    }
    metrics {
        # History size for debug files per metric/variant. For each metric/variant combination with an attached file
        # (e.g. debug image event), file names for the uploaded files will be recycled in such a way that no more than
        # X files are stored in the upload destination for each metric/variant combination.
        file_history_size: 100
        # Max history size for matplotlib imshow files per plot title.
        # File names for the uploaded images will be recycled in such a way that no more than
        # X images are stored in the upload destination for each matplotlib plot title.
        matplotlib_untitled_history_size: 100
        # Limit the number of digits after the dot in plot reporting (reducing plot report size)
        # plot_max_num_digits: 5
        # Settings for generated debug images
        images {
            format: JPEG
            quality: 87
            subsampling: 0
        }
        # Support plot-per-graph fully matching Tensorboard behavior (i.e. if this is set to true, each series should have its own graph)
        tensorboard_single_series_per_graph: false
    }
    network {
        metrics {
            # Number of threads allocated to uploading files (typically debug images) when transmitting metrics for
            # a specific iteration
            file_upload_threads: 4
            # Warn about upload starvation if no uploads were made in specified period while file-bearing events keep
            # being sent for upload
            file_upload_starvation_warning_sec: 120
        }
        iteration {
            # Max number of retries when getting frames if the server returned an error (http code 500)
            max_retries_on_server_error: 5
            # Backoff factory for consecutive retry attempts.
            # SDK will wait for {backoff factor} * (2 ^ ({number of total retries} - 1)) between retries.
            retry_backoff_factor_sec: 10
        }
    }
    aws {
        s3 {
            # S3 credentials, used for read/write access by various SDK elements
            # default, used for any bucket not specified below
            key: ""
            secret: ""
            region: ""
            credentials: [
                # specifies key/secret credentials to use when handling s3 urls (read or write)
                # {
                #     bucket: "my-bucket-name"
                #     key: "my-access-key"
                #     secret: "my-secret-key"
                # },
                # {
                #     # This will apply to all buckets in this host (unless key/value is specifically provided for a given bucket)
                #     host: "my-minio-host:9000"
                #     key: "12345678"
                #     secret: "12345678"
                #     multipart: false
                #     secure: false
                # }
            ]
        }
        boto3 {
            pool_connections: 512
            max_multipart_concurrency: 16
        }
    }
    google.storage {
        # # Default project and credentials file
        # # Will be used when no bucket configuration is found
        # project: "clearml"
        # credentials_json: "/path/to/credentials.json"
        # # Specific credentials per bucket and sub directory
        # credentials = [
        #     {
        #         bucket: "my-bucket"
        #         subdir: "path/in/bucket" # Not required
        #         project: "clearml"
        #         credentials_json: "/path/to/credentials.json"
        #     },
        # ]
    }
    azure.storage {
        # containers: [
        #     {
        #         account_name: "clearml"
        #         account_key: "secret"
        #         # container_name:
        #     }
        # ]
    }
    log {
        # debugging feature: set this to true to make null log propagate messages to root logger (so they appear in stdout)
        null_log_propagate: false
        task_log_buffer_capacity: 66
        # disable urllib info and lower levels
        disable_urllib3_info: true
    }
    development {
        # Development-mode options
        # dev task reuse window
        task_reuse_time_window_in_hours: 72.0
        # Run VCS repository detection asynchronously
        vcs_repo_detect_async: true
        # Store uncommitted git/hg source code diff in experiment manifest when training in development mode
        # This stores "git diff" or "hg diff" into the experiment's "script.requirements.diff" section
        store_uncommitted_code_diff: true
        # Support stopping an experiment in case it was externally stopped, status was changed or task was reset
        support_stopping: true
        # Default Task output_uri. if output_uri is not provided to Task.init, default_output_uri will be used instead.
        default_output_uri: ""
        # Default auto generated requirements optimize for smaller requirements
        # If True, analyze the entire repository regardless of the entry point.
        # If False, first analyze the entry point script, if it does not contain other to local files,
        # do not analyze the entire repository.
        force_analyze_entire_repo: false
        # If set to true, *clearml* update message will not be printed to the console
        # this value can be overwritten with os environment variable CLEARML_SUPPRESS_UPDATE_MESSAGE=1
        suppress_update_message: false
        # If this flag is true (default is false), instead of analyzing the code with Pigar, analyze with `pip freeze`
        detect_with_pip_freeze: false
        # Development mode worker
        worker {
            # Status report period in seconds
            report_period_sec: 2
            # ping to the server - check connectivity
            ping_period_sec: 30
            # Log all stdout & stderr
            log_stdout: true
            # compatibility feature, report memory usage for the entire machine
            # default (false), report only on the running process and its sub-processes
            report_global_mem_used: false
        }
    }
 }
--- a/docker/k8s-glue/glue-build/entrypoint.sh
+++ b/docker/k8s-glue/glue-build/entrypoint.sh
@ -0,0 +1,27 @@
 #!/bin/bash -x
 export CLEARML_FILES_HOST=${CLEARML_FILES_HOST:-$TRAINS_FILES_HOST}
 if [ -z "$CLEARML_FILES_HOST" ]; then
    CLEARML_HOST_IP=${CLEARML_HOST_IP:-${TRAINS_HOST_IP:-$(curl -s https://ifconfig.me/ip)}}
 fi
 export CLEARML_FILES_HOST=${CLEARML_FILES_HOST:-${TRAINS_FILES_HOST:-"http://$CLEARML_HOST_IP:8081"}}
 export CLEARML_WEB_HOST=${CLEARML_WEB_HOST:-${TRAINS_WEB_HOST:-"http://$CLEARML_HOST_IP:8080"}}
 export CLEARML_API_HOST=${CLEARML_API_HOST:-${TRAINS_API_HOST:-"http://$CLEARML_HOST_IP:8008"}}
 echo $CLEARML_FILES_HOST $CLEARML_WEB_HOST $CLEARML_API_HOST 1>&2
 if [ -z "$CLEARML_AGENT_NO_UPDATE" ]; then
  if [ -n "$CLEARML_AGENT_UPDATE_REPO" ]; then
    python3 -m pip install -q -U "$CLEARML_AGENT_UPDATE_REPO"
  else
    python3 -m pip install -q -U "clearml-agent${CLEARML_AGENT_UPDATE_VERSION:-$TRAINS_AGENT_UPDATE_VERSION}"
  fi
 fi
 QUEUE=${K8S_GLUE_QUEUE:-k8s_glue}
 MAX_PODS=${K8S_GLUE_MAX_PODS:-2}
 EXTRA_ARGS=${K8S_GLUE_EXTRA_ARGS:-}
 python3 k8s_glue_example.py --queue ${QUEUE} --max-pods ${MAX_PODS} ${EXTRA_ARGS}
--- a/docker/k8s-glue/glue-build/k8s_glue_example.py
+++ b/docker/k8s-glue/glue-build/k8s_glue_example.py
@ -0,0 +1,94 @@
 """
 This example assumes you have preconfigured services with selectors in the form of
 "ai.allegro.agent.serial=pod-<number>" and a targetPort of 10022.
 The K8sIntegration component will label each pod accordingly.
 """
 from argparse import ArgumentParser
 from clearml_agent.glue.k8s import K8sIntegration
 def parse_args():
    parser = ArgumentParser()
    group = parser.add_mutually_exclusive_group()
    parser.add_argument(
        "--queue", type=str, help="Queue to pull tasks from"
    )
    group.add_argument(
        "--ports-mode", action='store_true', default=False,
        help="Ports-Mode will add a label to the pod which can be used as service, in order to expose ports"
             "Should not be used with max-pods"
    )
    parser.add_argument(
        "--num-of-services", type=int, default=20,
        help="Specify the number of k8s services to be used. Use only with ports-mode."
    )
    parser.add_argument(
        "--base-port", type=int,
        help="Used in conjunction with ports-mode, specifies the base port exposed by the services. "
             "For pod #X, the port will be <base-port>+X. Note that pod number is calculated based on base-pod-num"
             "e.g. if base-port=20000 and base-pod-num=3, the port for the first pod will be 20003"
    )
    parser.add_argument(
        "--base-pod-num", type=int, default=1,
        help="Used in conjunction with ports-mode and base-port, specifies the base pod number to be used by the "
             "service (default: %(default)s)"
    )
    parser.add_argument(
        "--gateway-address", type=str, default=None,
        help="Used in conjunction with ports-mode, specify the external address of the k8s ingress / ELB"
    )
    parser.add_argument(
        "--pod-clearml-conf", type=str,
        help="Configuration file to be used by the pod itself (if not provided, current configuration is used)"
    )
    parser.add_argument(
        "--overrides-yaml", type=str,
        help="YAML file containing pod overrides to be used when launching a new pod"
    )
    parser.add_argument(
        "--template-yaml", type=str,
        help="YAML file containing pod template. If provided pod will be scheduled with kubectl apply "
             "and overrides are ignored, otherwise it will be scheduled with kubectl run"
    )
    parser.add_argument(
        "--ssh-server-port", type=int, default=0,
        help="If non-zero, every pod will also start an SSH server on the selected port (default: zero, not active)"
    )
    parser.add_argument(
        "--namespace", type=str,
        help="Specify the namespace in which pods will be created (default: %(default)s)", default="clearml"
    )
    group.add_argument(
        "--max-pods", type=int,
        help="Limit the maximum number of pods that this service can run at the same time."
             "Should not be used with ports-mode"
    )
    return parser.parse_args()
 def main():
    args = parse_args()
    user_props_cb = None
    if args.ports_mode and args.base_port:
        def k8s_user_props_cb(pod_number=0):
            user_prop = {"k8s-pod-port": args.base_port + pod_number}
            if args.gateway_address:
                user_prop["k8s-gateway-address"] = args.gateway_address
            return user_prop
        user_props_cb = k8s_user_props_cb
    k8s = K8sIntegration(
        ports_mode=args.ports_mode, num_of_services=args.num_of_services, base_pod_num=args.base_pod_num,
        user_props_cb=user_props_cb, overrides_yaml=args.overrides_yaml, clearml_conf_file=args.pod_clearml_conf,
        template_yaml=args.template_yaml, extra_bash_init_script=K8sIntegration.get_ssh_server_bash(
            ssh_port_number=args.ssh_server_port) if args.ssh_server_port else None,
        namespace=args.namespace, max_pods_limit=args.max_pods or None,
    )
    k8s.k8s_daemon(args.queue)
 if __name__ == "__main__":
    main()
--- a/docker/k8s-glue/glue-build/setup.sh
+++ b/docker/k8s-glue/glue-build/setup.sh
@ -0,0 +1,25 @@
 #!/bin/bash
 chmod +x /root/entrypoint.sh
 apt-get update -y
 apt-get dist-upgrade -y
 apt-get install -y curl unzip less locales
 locale-gen en_US.UTF-8
 apt-get install -y curl python3-pip git
 python3 -m pip install -U pip
 python3 -m pip install clearml-agent
 python3 -m pip install -U "cryptography>=2.9"
 curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip"
 unzip awscliv2.zip
 ./aws/install
 curl -o kubectl https://amazon-eks.s3.us-west-2.amazonaws.com/1.19.6/2021-01-05/bin/linux/amd64/kubectl
 chmod +x ./kubectl && mkdir -p $HOME/bin && cp ./kubectl $HOME/bin/kubectl && export PATH=$PATH:$HOME/bin
 curl -o aws-iam-authenticator https://amazon-eks.s3.us-west-2.amazonaws.com/1.19.6/2021-01-05/bin/linux/amd64/aws-iam-authenticator
 chmod +x ./aws-iam-authenticator && mkdir -p $HOME/bin && cp ./aws-iam-authenticator $HOME/bin/aws-iam-authenticator && export PATH=$PATH:$HOME/bin
 echo 'export PATH=$PATH:$HOME/bin' >> ~/.bashrc
--- a/docker/k8s-glue/k8s-glue.yml
+++ b/docker/k8s-glue/k8s-glue.yml
@ -0,0 +1,54 @@
 apiVersion: v1
 kind: Pod
 metadata:
  name: k8s-glue
 spec:
  containers:
    - name: k8s-glue-container
      image: allegroai/clearml-agent-k8s:test
      imagePullPolicy: Always
      command: [
          "/bin/bash",
          "-c",
          "echo \"api.credentials.access_key: $CLEARML_API_ACCESS_KEY\" >> ~/clearml.conf \
             && echo \"api.credentials.secret_key: $CLEARML_API_SECRET_KEY\" >> ~/clearml.conf \
             && echo \"api.api_server: $CLEARML_API_HOST\" >> ~/clearml.conf \
             && echo \"api.web_server: $CLEARML_WEB_HOST\" >> ~/clearml.conf \
             && echo \"api.files_server: $CLEARML_FILES_HOST\" >> ~/clearml.conf \
             && source /root/.bashrc \
             && export PATH=$PATH:$HOME/bin \
             && /root/entrypoint.sh
          "
      ]
      volumeMounts:
        - name: pod-template
          mountPath: /root/template
      env:
        - name: CLEARML_API_HOST
          value: ""
        - name: CLEARML_WEB_HOST
          value: ""
        - name: CLEARML_FILES_HOST
          value: ""
 #        - name: K8S_GLUE_MAX_PODS
 #          value: "2"
        - name: K8S_GLUE_QUEUE
          value: "k8s-glue"
        - name: K8S_GLUE_EXTRA_ARGS
          value: "--template-yaml /root/template/pod_template.yml"
        - name: CLEARML_API_ACCESS_KEY
          value: ""
        - name: CLEARML_API_SECRET_KEY
          value: ""
        - name: CLEARML_WORKER_ID
          value: "k8s-glue-agent" 
        - name: CLEARML_AGENT_UPDATE_REPO
          value: ""
        - name: FORCE_CLEARML_AGENT_REPO
          value: ""
        - name: CLEARML_DOCKER_IMAGE
          value: "ubuntu:18.04"
  volumes:
    - name: pod-template
      secret:
        secretName: k8s-glue-pod-template
--- a/docker/k8s-glue/pod_template.yml
+++ b/docker/k8s-glue/pod_template.yml
@ -0,0 +1,13 @@
 apiVersion: v1
 metadata:
  namespace: clearml
 spec:
  containers:
  - resources:
      limits:
        cpu:             1000m
        memory:          4G
      requests:
        cpu:             1000m
        memory:          4G
  restartPolicy: Never