Compare commits

...

16 Commits

Author SHA1 Message Date
allegroai
5446aed9cf Version bump to v1.2.2 2022-03-30 20:48:28 +03:00
allegroai
b94ec85461 Fix update should run with -y 2022-03-30 20:48:11 +03:00
allegroai
f55f4f7535 Version bump 2022-03-30 20:11:13 +03:00
allegroai
c87da3a079 Fix apt-get update fail causes apt-get install to not be executed 2022-03-30 20:10:57 +03:00
allegroai
c3590a53a8 Fix CLEARML_AGENT_SKIP_PIP_VENV_INSTALL fails to find python executable 2022-03-30 20:10:08 +03:00
allegroai
a4315722ab Version bump to vv1.2.1 2022-03-28 18:13:20 +03:00
allegroai
c901bd331c Fix git packages are installed even if commit is given and is preinstalled when using cached virtual environment 2022-03-28 18:11:46 +03:00
allegroai
df97f170a2 Fix clearml-agent init
Use app.clear.ml as default server
Add git token refrences
2022-03-24 22:08:06 +02:00
allegroai
a30a2dad66 Add git personal token docs 2022-03-24 22:07:15 +02:00
allegroai
2432f5bb68 Add CLEARML_AGENT_PROPAGATE_EXITCODE, set to 1 to let clearml-agent execute retrun a nonzero exit code on failure (notice by default we keep the retrun code 0, the exception is the k8s glue with non-restarting Pods, where users would want to get visibility into failing Tasks, do not use unless you know what to expect from k8s) 2022-03-24 22:04:25 +02:00
allegroai
341086d86a Fix vcs packages are reinstalled when same commit version is already installed 2022-03-24 22:03:25 +02:00
allegroai
1163c96438 Add agent.package_manager.force_original_requirements allowing to only use the "org_pip" coming from dev execution (using this prevents editing the installed packages from the UI) 2022-03-24 22:00:33 +02:00
allegroai
4c120d7cd0 Add ability to override container LOCAL_PYTHON, add auto python support (max 3.15) 2022-03-24 21:58:07 +02:00
Jan Stratil
966a9758b8 Add condition to requirements for typing package (python < 3.5) (#103)
- According to the maintainer of the typing package, it is recommended
  to use the typing package with condition for python version since
  for python3.5 and later typing package is useless (as it is in the
  stdlib).
- Typing package can cause some issues so NOT installing it can solve
  some of them.

Co-authored-by: Jan Stratil <jan.stratil@innovatrics.com>
2022-03-23 15:03:37 +02:00
allegroai
f58071fc74 Fix README 2022-03-20 23:24:07 +02:00
allegroai
8712c5e636 Fix PyTorch aarch64 and windows support 2022-03-16 17:40:21 +02:00
14 changed files with 134 additions and 31 deletions

View File

@@ -61,7 +61,7 @@ It is a zero configuration fire-and-forget execution agent, providing a full ML/
We think Kubernetes is awesome, but it should be a choice.
We designed `clearml-agent` so you can run bare-metal or inside a pod with any mix that fits your environment.
Find Dockerfiles in [docker](./docker) dir and a helm Chart in https://github.com/allegroai/clearml-helm-charts
Find Dockerfiles in the [docker](./docker) dir and a helm Chart in https://github.com/allegroai/clearml-helm-charts
#### Benefits of integrating existing K8s with ClearML-Agent
- ClearML-Agent adds the missing scheduling capabilities to K8s
- Allowing for more flexible automation from code

View File

@@ -11,7 +11,11 @@
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
# Notice: GitHub personal token is equivalent to password, you can put it directly into `git_pass`
# **Notice**: GitHub personal token is equivalent to password, you can put it directly into `git_pass`
# To learn how to generate git token GitHub/Bitbucket/GitLab:
# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token
# https://support.atlassian.com/bitbucket-cloud/docs/app-passwords/
# https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html
# git_user: ""
# git_pass: ""
# git_host: ""

View File

@@ -16,6 +16,7 @@ ENV_DISABLE_VAULT_SUPPORT = EnvEntry('CLEARML_AGENT_DISABLE_VAULT_SUPPORT', type
ENV_ENABLE_ENV_CONFIG_SECTION = EnvEntry('CLEARML_AGENT_ENABLE_ENV_CONFIG_SECTION', type=bool)
ENV_ENABLE_FILES_CONFIG_SECTION = EnvEntry('CLEARML_AGENT_ENABLE_FILES_CONFIG_SECTION', type=bool)
ENV_VENV_CONFIGURED = EnvEntry('VIRTUAL_ENV', type=str)
ENV_PROPAGATE_EXITCODE = EnvEntry("CLEARML_AGENT_PROPAGATE_EXITCODE", type=bool, default=False)
ENV_INITIAL_CONNECT_RETRY_OVERRIDE = EnvEntry(
'CLEARML_AGENT_INITIAL_CONNECT_RETRY_OVERRIDE', default=True, converter=safe_text_to_bool
)

View File

@@ -27,9 +27,9 @@ except Exception:
host_description = """
Editing configuration file: {CONFIG_FILE}
Enter the url of the clearml-server's Web service, for example: {HOST}
Enter the url of the clearml-server's Web service, for example: {HOST} or https://app.clear.ml
""".format(
CONFIG_FILE=LOCAL_CONFIG_FILES[0],
CONFIG_FILE=LOCAL_CONFIG_FILES[-1],
HOST=def_host,
)
@@ -84,7 +84,7 @@ def main():
host = input_url('API Host', api_server)
else:
print(host_description)
host = input_url('WEB Host', '')
host = input_url('WEB Host', 'https://app.clear.ml')
parsed_host = verify_url(host)
api_host, files_host, web_host = parse_host(parsed_host, allow_input=True)
@@ -116,9 +116,15 @@ def main():
print('Enter git username for repository cloning (leave blank for SSH key authentication): [] ', end='')
git_user = input()
if git_user.strip():
print('Enter password for user \'{}\': '.format(git_user), end='')
print(
"Git personal token is equivalent to a password, to learn how to generate a token:\n"
" GitHub: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token\n" # noqa
" Bitbucket: https://support.atlassian.com/bitbucket-cloud/docs/app-passwords/\n"
" GitLab: https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html\n"
)
print('Enter git password token for user \'{}\': '.format(git_user), end='')
git_pass = input()
print('Git repository cloning will be using user={} password={}'.format(git_user, git_pass))
print('Git repository cloning will be using user={} token={}'.format(git_user, git_pass))
else:
git_user = None
git_pass = None

View File

@@ -41,7 +41,7 @@ from clearml_agent.backend_api.services import workers as workers_api
from clearml_agent.backend_api.session import CallResult
from clearml_agent.backend_api.session.defs import (
ENV_ENABLE_ENV_CONFIG_SECTION, ENV_ENABLE_FILES_CONFIG_SECTION,
ENV_VENV_CONFIGURED, )
ENV_VENV_CONFIGURED, ENV_PROPAGATE_EXITCODE, )
from clearml_agent.backend_config.defs import UptimeConf
from clearml_agent.backend_config.utils import apply_environment, apply_files
from clearml_agent.commands.base import resolve_names, ServiceCommandSection
@@ -639,7 +639,7 @@ class Worker(ServiceCommandSection):
pass
def run_one_task(self, queue, task_id, worker_args, docker=None, task_session=None):
# type: (Text, Text, WorkerParams, Optional[Text]) -> ()
# type: (Text, Text, WorkerParams, Optional[Text]) -> int
"""
Run one task pulled from queue.
:param queue: ID of queue that task was pulled from
@@ -647,6 +647,8 @@ class Worker(ServiceCommandSection):
:param worker_args: Worker command line arguments
:param task_session: The session for running operations on the passed task
:param docker: Docker image in which the execution task will run
:return: exit code (0 is success)
"""
# start new process and execute task id
# "Running task '{}'".format(task_id)
@@ -848,6 +850,8 @@ class Worker(ServiceCommandSection):
# unregister this worker, it was killed
self._unregister()
return status
def get_task_session(self, user, company):
"""
Get task session for the user by cloning the agent session
@@ -1878,6 +1882,9 @@ class Worker(ServiceCommandSection):
base_interpreter=package_api.requirements_manager.get_interpreter(),
requirement_substitutions=[OnlyExternalRequirements],
)
# manually update the current state,
# for the external git reference chance (in the replace callback)
package_api.requirements_manager.update_installed_packages_state(package_api.freeze())
# make sure we run the handlers
cached_requirements = \
{k: package_api.requirements_manager.replace(requirements[k] or '')
@@ -2098,7 +2105,7 @@ class Worker(ServiceCommandSection):
)
try:
self.report_monitor(ResourceMonitor.StatusReport(task=current_task.id))
self.run_one_task(queue='', task_id=current_task.id, worker_args=worker_params, docker=docker)
status = self.run_one_task(queue='', task_id=current_task.id, worker_args=worker_params, docker=docker)
finally:
self.stop_monitor()
self._unregister()
@@ -2106,7 +2113,7 @@ class Worker(ServiceCommandSection):
if full_monitoring and self.temp_config_path:
safe_remove_file(self._session.config_file)
Singleton.close_pid_file()
return
return status if ENV_PROPAGATE_EXITCODE.get() else 0
self._apply_extra_configuration()
@@ -2174,8 +2181,22 @@ class Worker(ServiceCommandSection):
if not custom_build_script:
if self._session.config.get("agent.package_manager.force_repo_requirements_txt", False):
requirements = None
print("[package_manager.force_repo_requirements_txt=true] "
"Skipping requirements, using repository \"requirements.txt\" ")
print("\n[package_manager.force_repo_requirements_txt=true] "
"Skipping requirements, using repository \"requirements.txt\" \n")
elif self._session.config.get("agent.package_manager.force_original_requirements", False):
try:
requirements = current_task.script.requirements
if isinstance(requirements, dict):
if 'org_pip' in requirements:
requirements['pip'] = requirements['org_pip']
print("\n[package_manager.force_original_requirements=true] "
"Using original requirements: \n{}\n".format(requirements['org_pip']))
if 'org_conda' in requirements:
requirements['conda'] = requirements['org_conda']
print("\n[package_manager.force_original_requirements=true] "
"Using original requirements: \n{}\n".format(requirements['org_conda']))
except AttributeError:
requirements = None
else:
try:
requirements = current_task.script.requirements
@@ -2226,6 +2247,9 @@ class Worker(ServiceCommandSection):
base_interpreter=package_api.requirements_manager.get_interpreter(),
requirement_substitutions=[OnlyExternalRequirements]
)
# manually update the current state,
# for the external git reference chance (in the replace callback)
package_api.requirements_manager.update_installed_packages_state(package_api.freeze())
# make sure we run the handlers
cached_requirements = \
{k: package_api.requirements_manager.replace(requirements[k] or '')
@@ -2790,7 +2814,7 @@ class Worker(ServiceCommandSection):
if self._session.debug_mode and temp_file:
rm_file(temp_file.name)
# call post installation callback
requirements_manager.post_install(self._session)
requirements_manager.post_install(self._session, package_manager=package_api)
# mark as successful installation
repo_requirements_installed = True
@@ -3175,6 +3199,10 @@ class Worker(ServiceCommandSection):
if standalone_mode:
self.package_api = VirtualenvPip(**package_manager_params)
else:
if not Path(executable_name).is_file():
executable_name_path = find_executable(executable_name)
print("Interpreter '{}' found at '{}'".format(executable_name, executable_name_path))
executable_name = executable_name_path
# we can change it, no one is going to use it anyhow
package_manager_params['path'] = None
package_manager_params['interpreter'] = executable_name
@@ -3609,11 +3637,11 @@ class Worker(ServiceCommandSection):
' libsm6 libxext6 libxrender-dev libglib2.0-0' if install_opencv_libs else ""),
"[ ! -z $(which git) ] || export CLEARML_APT_INSTALL=\"$CLEARML_APT_INSTALL git\"",
"declare LOCAL_PYTHON",
"for i in {{10..5}}; do which {python_single_digit}.$i && " +
"[ ! -z $LOCAL_PYTHON ] || for i in {{15..5}}; do which {python_single_digit}.$i && " +
"{python_single_digit}.$i -m pip --version && " +
"export LOCAL_PYTHON=$(which {python_single_digit}.$i) && break ; done",
"[ ! -z $LOCAL_PYTHON ] || export CLEARML_APT_INSTALL=\"$CLEARML_APT_INSTALL {python_single_digit}-pip\"", # noqa
"[ -z \"$CLEARML_APT_INSTALL\" ] || (apt-get update && apt-get install -y $CLEARML_APT_INSTALL)",
"[ -z \"$CLEARML_APT_INSTALL\" ] || (apt-get update -y ; apt-get install -y $CLEARML_APT_INSTALL)",
]
if preprocess_bash_script:

View File

@@ -69,7 +69,7 @@ class K8sIntegration(Worker):
"apt-get update",
"apt-get install -y git libsm6 libxext6 libxrender-dev libglib2.0-0",
"declare LOCAL_PYTHON",
"for i in {{10..5}}; do which python3.$i && python3.$i -m pip --version && "
"[ ! -z $LOCAL_PYTHON ] || for i in {{15..5}}; do which python3.$i && python3.$i -m pip --version && "
"export LOCAL_PYTHON=$(which python3.$i) && break ; done",
"[ ! -z $LOCAL_PYTHON ] || apt-get install -y python3-pip",
"[ ! -z $LOCAL_PYTHON ] || export LOCAL_PYTHON=python3",

View File

@@ -424,7 +424,7 @@ class CondaAPI(PackageManager):
finally:
PackageManager._selected_manager = self
self.requirements_manager.post_install(self.session)
self.requirements_manager.post_install(self.session, package_manager=self)
def load_requirements(self, requirements):
# if we are in read only mode, do not uninstall anything
@@ -642,7 +642,7 @@ class CondaAPI(PackageManager):
finally:
PackageManager._selected_manager = self
self.requirements_manager.post_install(self.session)
self.requirements_manager.post_install(self.session, package_manager=self)
return True
def _parse_conda_result_bad_packges(self, result_dict):

View File

@@ -46,11 +46,10 @@ class ExternalRequirements(SimpleSubstitution):
post_install_req = self.post_install_req
self.post_install_req = []
for req in post_install_req:
try:
freeze_base = PackageManager.out_of_scope_freeze() or ''
except:
freeze_base = ''
if self.is_already_installed(req):
print("No need to reinstall \'{}\' from VCS, "
"the exact same version is already installed".format(req.name))
continue
req_line = self._add_vcs_credentials(req, session)
# if we have older pip version we have to make sure we replace back the package name with the
@@ -175,5 +174,11 @@ class OnlyExternalRequirements(ExternalRequirements):
# Do not store the skipped requirements
# mark skip package
if super(OnlyExternalRequirements, self).match(req):
if self.is_already_installed(req):
print("No need to reinstall \'{}\' from VCS, "
"the exact same version is already installed".format(req.name))
return Text('')
return self._add_vcs_credentials(req, self._session)
return Text('')

View File

@@ -39,7 +39,7 @@ class VirtualenvPip(SystemPip, PackageManager):
if isinstance(requirements, dict) and requirements.get("pip"):
requirements["pip"] = self.requirements_manager.replace(requirements["pip"])
super(VirtualenvPip, self).load_requirements(requirements)
self.requirements_manager.post_install(self.session)
self.requirements_manager.post_install(self.session, package_manager=self)
def create_flags(self):
"""

View File

@@ -2,6 +2,7 @@ from __future__ import unicode_literals
import re
import sys
import platform
from furl import furl
import urllib.parse
from operator import itemgetter
@@ -245,10 +246,15 @@ class PytorchRequirement(SimpleSubstitution):
return "macos"
raise RuntimeError("unrecognized OS")
@staticmethod
def get_arch():
return str(platform.machine()).lower()
def _get_link_from_torch_page(self, req, torch_url):
links_parser = LinksHTMLParser()
links_parser.feed(requests.get(torch_url, timeout=10).text)
platform_wheel = "win" if self.get_platform() == "windows" else self.get_platform()
arch_wheel = self.get_arch()
py_ver = self.python_major_minor_str.replace('.', '')
url = None
last_v = None
@@ -269,8 +275,11 @@ class PytorchRequirement(SimpleSubstitution):
continue
if len(parts) < 3 or not parts[2].endswith(py_ver):
continue
if len(parts) < 5 or platform_wheel not in parts[4]:
if len(parts) < 5 or platform_wheel not in parts[4].lower():
continue
if len(parts) < 5 or arch_wheel not in parts[4].lower():
continue
# yes this is for linux python 2.7 support, this is the only python 2.7 we support...
if py_ver and py_ver[0] == '2' and len(parts) > 3 and not parts[3].endswith('u'):
continue

View File

@@ -179,7 +179,7 @@ class MarkerRequirement(object):
if self.remove_local_file_ref():
# print warning
logging.getLogger(__name__).warning(
'Local file not found [{}], references removed !'.format(line))
'Local file not found [{}], references removed'.format(line))
class SimpleVersion:
@@ -437,6 +437,7 @@ class RequirementSubstitution(object):
self.config = session.config # type: ConfigTree
self.suffix = '.post{config[agent.cuda_version]}.dev{config[agent.cudnn_version]}'.format(config=self.config)
self.package_manager = self.config['agent.package_manager.type']
self._is_already_installed_cb = None
@abstractmethod
def match(self, req): # type: (MarkerRequirement) -> bool
@@ -452,6 +453,20 @@ class RequirementSubstitution(object):
"""
pass
def set_is_already_installed_cb(self, cb):
self._is_already_installed_cb = cb
def is_already_installed(self, req):
if not self._is_already_installed_cb:
return False
# noinspection PyBroadException
try:
return self._is_already_installed_cb(req)
except BaseException as ex:
# debug could not resolve something
print("Warning: Requirements post install callback exception (check if package installed): {}".format(ex))
return False
def post_scan_add_req(self): # type: () -> Optional[MarkerRequirement]
"""
Allows the RequirementSubstitution to add an extra line/requirements after
@@ -562,6 +577,7 @@ class RequirementsManager(object):
cache_dir=pip_cache_dir.as_posix())
self._base_interpreter = base_interpreter
self._cwd = None
self._installed_parsed_packages = set()
def register(self, cls): # type: (Type[RequirementSubstitution]) -> None
self.handlers.append(cls(self._session))
@@ -619,7 +635,9 @@ class RequirementsManager(object):
return join_lines(result)
def post_install(self, session):
def post_install(self, session, package_manager=None):
if package_manager:
self.update_installed_packages_state(package_manager.freeze())
for h in self.handlers:
try:
h.post_install(session)
@@ -641,6 +659,34 @@ class RequirementsManager(object):
def get_interpreter(self):
return self._base_interpreter
def update_installed_packages_state(self, requirements):
"""
Updates internal Installed Packages objects, so that later we can detect
if we already have a pre-installed package
:param requirements: is the output of a freeze() call, i.e. dict {'pip': "package==version"}
"""
requirements = requirements if not isinstance(requirements, dict) else requirements.get("pip")
self._installed_parsed_packages = self.parse_requirements_section_to_marker_requirements(
requirements=requirements, cwd=self._cwd)
for h in self.handlers:
h.set_is_already_installed_cb(self._callback_is_already_installed)
def _callback_is_already_installed(self, req):
for p in (self._installed_parsed_packages or []):
if p.name != req.name:
continue
# if this is version control package, only return true of both installed and requests specify commit ID
if req.vcs:
return p.vcs and req.revision and req.revision == p.revision
if not req.specs and not p.specs:
return True
# return if this is the same version
return req.specs and p.specs and req.compare_version(p, op="==")
return False
@staticmethod
def get_cuda_version(config): # type: (ConfigTree) -> (Text, Text)
# we assume os.environ already updated the config['agent.cuda_version'] & config['agent.cudnn_version']

View File

@@ -1 +1 @@
__version__ = '1.2.0rc6'
__version__ = '1.2.2'

View File

@@ -15,7 +15,11 @@ api {
agent {
# Set GIT user/pass credentials (if user/pass are set, GIT protocol will be set to https)
# leave blank for GIT SSH credentials (set force_git_ssh_protocol=true to force SSH protocol)
# Notice: GitHub personal token is equivalent to password, you can put it directly into `git_pass`
# **Notice**: GitHub personal token is equivalent to password, you can put it directly into `git_pass`
# To learn how to generate git token GitHub/Bitbucket/GitLab:
# https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token
# https://support.atlassian.com/bitbucket-cloud/docs/app-passwords/
# https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html
git_user=""
git_pass=""
# Limit credentials to a single domain, for example: github.com,

View File

@@ -12,6 +12,6 @@ pyjwt>=1.6.4,<2.1.0
PyYAML>=3.12,<5.5.0
requests>=2.20.0,<2.26.0
six>=1.13.0,<1.16.0
typing>=3.6.4,<3.8.0
typing>=3.6.4,<3.8.0 ; python_version < '3.5'
urllib3>=1.21.1,<1.27.0
virtualenv>=16,<21