From cc656e296973fde56378661bc0e26967ee782bab Mon Sep 17 00:00:00 2001 From: mads-oestergaard <104391876+mads-oestergaard@users.noreply.github.com> Date: Wed, 27 Nov 2024 12:44:55 +0100 Subject: [PATCH] Add support for uv as package manager (#218) * add uv as a package manager * update configs * update worker and defs * update environ * Update configs to highlight sync command * rename to sync_extra_args and set UV_CACHE_DIR --- .../backend_api/config/default/agent.conf | 12 +- clearml_agent/commands/worker.py | 30 ++- clearml_agent/definitions.py | 1 + clearml_agent/helper/package/uv_api.py | 227 ++++++++++++++++++ clearml_agent/session.py | 1 + docker/k8s-glue/build-resources/clearml.conf | 3 +- docs/clearml.conf | 6 +- 7 files changed, 275 insertions(+), 5 deletions(-) create mode 100644 clearml_agent/helper/package/uv_api.py diff --git a/clearml_agent/backend_api/config/default/agent.conf b/clearml_agent/backend_api/config/default/agent.conf index 72914c6..13f5c90 100644 --- a/clearml_agent/backend_api/config/default/agent.conf +++ b/clearml_agent/backend_api/config/default/agent.conf @@ -54,15 +54,17 @@ # docker_use_activated_venv: true # select python package manager: - # currently supported: pip, conda and poetry + # currently supported: pip, conda, uv and poetry # if "pip" or "conda" are used, the agent installs the required packages # based on the "installed packages" section of the Task. If the "installed packages" is empty, # it will revert to using `requirements.txt` from the repository's root directory. # If Poetry is selected and the root repository contains `poetry.lock` or `pyproject.toml`, # the "installed packages" section is ignored, and poetry is used. # If Poetry is selected and no lock file is found, it reverts to "pip" package manager behaviour. + # If uv is selected and the root repository contains `uv.lock` or `pyproject.toml`, + # the "installed packages" section is ignored, and uv is used. package_manager: { - # supported options: pip, conda, poetry + # supported options: pip, conda, poetry, uv type: pip, # specify pip version to use (examples "<20.2", "==19.3.1", "", empty string will install the latest version) @@ -70,6 +72,8 @@ # specify poetry version to use (examples "<2", "==1.1.1", "", empty string will install the latest version) # poetry_version: "<2", # poetry_install_extra_args: ["-v"] + # uv_version: ">0.4", + # uv_sync_extra_args: ["--all-extras"] # virtual environment inherits packages from system system_site_packages: false, @@ -133,6 +137,10 @@ # if set to true, the agent will look for the "poetry.lock" file # in the passed current working directory instead of the repository's root directory. poetry_files_from_repo_working_dir: false + + # if set to true, the agent will look for the "uv.lock" file + # in the passed current working directory instead of the repository's root directory. + uv_files_from_repo_working_dir: false }, # target folder for virtual environments builds, created when executing experiment diff --git a/clearml_agent/commands/worker.py b/clearml_agent/commands/worker.py index 514bbc5..855131b 100644 --- a/clearml_agent/commands/worker.py +++ b/clearml_agent/commands/worker.py @@ -122,6 +122,7 @@ from clearml_agent.helper.package.external_req import ExternalRequirements, Only from clearml_agent.helper.package.pip_api.system import SystemPip from clearml_agent.helper.package.pip_api.venv import VirtualenvPip from clearml_agent.helper.package.poetry_api import PoetryConfig, PoetryAPI +from clearml_agent.helper.package.uv_api import UvConfig, UvAPI from clearml_agent.helper.package.post_req import PostRequirement from clearml_agent.helper.package.priority_req import PriorityPackageRequirement, PackageCollectorRequirement, \ CachedPackageRequirement @@ -756,6 +757,7 @@ class Worker(ServiceCommandSection): self.is_venv_update = self._session.config.agent.venv_update.enabled self.poetry = PoetryConfig(self._session) + self.uv = UvConfig(self._session) self.docker_image_func = None self._patch_docker_cmd_func = None self._docker_image = None @@ -3017,7 +3019,7 @@ class Worker(ServiceCommandSection): ENV_TASK_EXECUTE_AS_USER.get()) use_execv = False else: - use_execv = is_linux_platform() and not isinstance(self.package_api, (PoetryAPI, CondaAPI)) + use_execv = is_linux_platform() and not isinstance(self.package_api, (PoetryAPI, UvAPI ,CondaAPI)) self._session.api_client.tasks.started( task=current_task.id, @@ -3434,6 +3436,31 @@ class Worker(ServiceCommandSection): except Exception as ex: self.log.error("failed installing poetry requirements: {}".format(ex)) return None + + def _install_uv_requirements(self, repo_info, working_dir=None): + # type: (Optional[RepoInfo], Optional[str]) -> Optional[UvAPI] + if not repo_info: + return None + + files_from_working_dir = self._session.config.get( + "agent.package_manager.uv_files_from_repo_working_dir", False) + lockfile_path = Path(repo_info.root) / ((working_dir or "") if files_from_working_dir else "") + + try: + if not self.uv.enabled: + return None + + self.uv.initialize(cwd=lockfile_path) + api = self.uv.get_api(lockfile_path) + if api.enabled: + print('UV Enabled: Ignoring requested python packages, using repository uv lock file!') + api.install() + return api + + print(f"Could not find pyproject.toml or uv.lock file in {lockfile_path} \n") + except Exception as ex: + self.log.error("failed installing uv requirements: {}".format(ex)) + return None def install_requirements( self, execution, repo_info, requirements_manager, cached_requirements=None, cwd=None, package_api=None @@ -3463,6 +3490,7 @@ class Worker(ServiceCommandSection): package_api.cwd = cwd api = self._install_poetry_requirements(repo_info, execution.working_dir) + api = self._install_uv_requirements(repo_info, execution.working_dir) if api: # update back the package manager, this hack should be fixed if package_api == self.package_api: diff --git a/clearml_agent/definitions.py b/clearml_agent/definitions.py index b99a95c..23c66ff 100644 --- a/clearml_agent/definitions.py +++ b/clearml_agent/definitions.py @@ -161,6 +161,7 @@ ENV_AGENT_SKIP_PYTHON_ENV_INSTALL = EnvironmentConfig("CLEARML_AGENT_SKIP_PYTHON ENV_AGENT_FORCE_CODE_DIR = EnvironmentConfig("CLEARML_AGENT_FORCE_CODE_DIR") ENV_AGENT_FORCE_EXEC_SCRIPT = EnvironmentConfig("CLEARML_AGENT_FORCE_EXEC_SCRIPT") ENV_AGENT_FORCE_POETRY = EnvironmentConfig("CLEARML_AGENT_FORCE_POETRY", type=bool) +ENV_AGENT_FORCE_UV = EnvironmentConfig("CLEARML_AGENT_FORCE_UV", type=bool) ENV_AGENT_FORCE_TASK_INIT = EnvironmentConfig("CLEARML_AGENT_FORCE_TASK_INIT", type=bool) ENV_DOCKER_SKIP_GPUS_FLAG = EnvironmentConfig("CLEARML_DOCKER_SKIP_GPUS_FLAG", "TRAINS_DOCKER_SKIP_GPUS_FLAG") ENV_AGENT_GIT_USER = EnvironmentConfig("CLEARML_AGENT_GIT_USER", "TRAINS_AGENT_GIT_USER") diff --git a/clearml_agent/helper/package/uv_api.py b/clearml_agent/helper/package/uv_api.py new file mode 100644 index 0000000..55576a2 --- /dev/null +++ b/clearml_agent/helper/package/uv_api.py @@ -0,0 +1,227 @@ +from copy import deepcopy +from functools import wraps + +import attr +import sys +import os +from pathlib2 import Path + +from clearml_agent.definitions import ENV_AGENT_FORCE_UV +from clearml_agent.helper.process import Argv, DEVNULL, check_if_command_exists +from clearml_agent.session import Session, UV + + +def prop_guard(prop, log_prop=None): + assert isinstance(prop, property) + assert not log_prop or isinstance(log_prop, property) + + def decorator(func): + message = "%s:%s calling {}, {} = %s".format(func.__name__, prop.fget.__name__) + + @wraps(func) + def new_func(self, *args, **kwargs): + prop_value = prop.fget(self) + if log_prop: + log_prop.fget(self).debug( + message, + type(self).__name__, + "" if prop_value else " not", + prop_value, + ) + if prop_value: + return func(self, *args, **kwargs) + + return new_func + + return decorator + + +class UvConfig: + def __init__(self, session): + # type: (Session, str) -> None + self.session = session + self._log = session.get_logger(__name__) + self._python = ( + sys.executable + ) # default, overwritten from session config in initialize() + self._initialized = False + + @property + def log(self): + return self._log + + @property + def enabled(self): + return ( + ENV_AGENT_FORCE_UV.get() + or self.session.config["agent.package_manager.type"] == UV + ) + + _guard_enabled = prop_guard(enabled, log) + + def run(self, *args, **kwargs): + func = kwargs.pop("func", Argv.get_output) + kwargs.setdefault("stdin", DEVNULL) + kwargs["env"] = deepcopy(os.environ) + if "VIRTUAL_ENV" in kwargs["env"] or "CONDA_PREFIX" in kwargs["env"]: + kwargs["env"].pop("VIRTUAL_ENV", None) + kwargs["env"].pop("CONDA_PREFIX", None) + kwargs["env"].pop("PYTHONPATH", None) + if hasattr(sys, "real_prefix") and hasattr(sys, "base_prefix"): + path = ":" + kwargs["env"]["PATH"] + path = path.replace(":" + sys.base_prefix, ":" + sys.real_prefix, 1) + kwargs["env"]["PATH"] = path + + if self.session and self.session.config and args and args[0] == "sync": + # Set the cache dir to venvs dir + if (cache_dir := self.session.config.get("agent.venvs_dir", None)) is not None: + os.environ["UV_CACHE_DIR"] = cache_dir + + extra_args = self.session.config.get( + "agent.package_manager.uv_sync_extra_args", None + ) + if extra_args: + args = args + tuple(extra_args) + + if check_if_command_exists("uv"): + argv = Argv("uv", *args) + else: + argv = Argv(self._python, "-m", "uv", *args) + self.log.debug("running: %s", argv) + return func(argv, **kwargs) + + @_guard_enabled + def initialize(self, cwd=None): + if not self._initialized: + # use correct python version -- detected in Worker.install_virtualenv() and written to + # session + if self.session.config.get("agent.python_binary", None): + self._python = self.session.config.get("agent.python_binary") + + if ( + self.session.config.get("agent.package_manager.uv_version", None) + is not None + ): + version = str( + self.session.config.get("agent.package_manager.uv_version") + ) + + # get uv version + version = version.replace(" ", "") + if ( + ("=" in version) + or ("~" in version) + or ("<" in version) + or (">" in version) + ): + version = version + elif version: + version = "==" + version + # (we are not running it yet) + argv = Argv( + self._python, + "-m", + "pip", + "install", + "uv{}".format(version), + "--upgrade", + "--disable-pip-version-check", + ) + # this is just for beauty and checks, we already set the verion in the Argv + if not version: + version = "latest" + else: + # mark to install uv if not already installed (we are not running it yet) + argv = Argv( + self._python, + "-m", + "pip", + "install", + "uv", + "--disable-pip-version-check", + ) + version = "" + + # first upgrade pip if we need to + try: + from clearml_agent.helper.package.pip_api.venv import VirtualenvPip + + pip = VirtualenvPip( + session=self.session, + python=self._python, + requirements_manager=None, + path=None, + interpreter=self._python, + ) + pip.upgrade_pip() + except Exception as ex: + self.log.warning("failed upgrading pip: {}".format(ex)) + + # check if we do not have a specific version and uv is found skip installation + if not version and check_if_command_exists("uv"): + print( + "Notice: uv was found, no specific version required, skipping uv installation" + ) + else: + print("Installing / Upgrading uv package to {}".format(version)) + # now install uv + try: + print(argv.get_output()) + except Exception as ex: + self.log.warning("failed installing uv: {}".format(ex)) + + # all done. + self._initialized = True + + def get_api(self, path): + # type: (Path) -> UvAPI + return UvAPI(self, path) + + +@attr.s +class UvAPI(object): + config = attr.ib(type=UvConfig) + path = attr.ib(type=Path, converter=Path) + + INDICATOR_FILES = "pyproject.toml", "uv.lock" + + def install(self): + # type: () -> bool + if self.enabled: + self.config.run("sync", "--locked", cwd=str(self.path), func=Argv.check_call) + return True + return False + + @property + def enabled(self): + return self.config.enabled and ( + any((self.path / indicator).exists() for indicator in self.INDICATOR_FILES) + ) + + def freeze(self, freeze_full_environment=False): + lines = self.config.run("pip", "show", cwd=str(self.path)).splitlines() + lines = [[p for p in line.split(" ") if p] for line in lines] + return { + "pip": [ + parts[0] + "==" + parts[1] + " # " + " ".join(parts[2:]) + for parts in lines + ] + } + + def get_python_command(self, extra): + if check_if_command_exists("uv"): + return Argv("uv", "run", "python", *extra) + else: + return Argv(self.config._python, "-m", "uv", "run", "python", *extra) + + def upgrade_pip(self, *args, **kwargs): + pass + + def set_selected_package_manager(self, *args, **kwargs): + pass + + def out_of_scope_install_package(self, *args, **kwargs): + pass + + def install_from_file(self, *args, **kwargs): + pass diff --git a/clearml_agent/session.py b/clearml_agent/session.py index 7962cd5..f9ceb18 100644 --- a/clearml_agent/session.py +++ b/clearml_agent/session.py @@ -24,6 +24,7 @@ from clearml_agent.helper.docker_args import DockerArgsSanitizer, sanitize_urls from .version import __version__ POETRY = "poetry" +UV = "uv" @attr.s diff --git a/docker/k8s-glue/build-resources/clearml.conf b/docker/k8s-glue/build-resources/clearml.conf index d740ce9..18c38cc 100644 --- a/docker/k8s-glue/build-resources/clearml.conf +++ b/docker/k8s-glue/build-resources/clearml.conf @@ -53,8 +53,9 @@ agent { # select python package manager: # currently supported pip and conda # poetry is used if pip selected and repository contains poetry.lock file + # uv is used if pip selected and repository contains uv.lock file package_manager: { - # supported options: pip, conda, poetry + # supported options: pip, conda, poetry, uv type: pip, # specify pip version to use (examples "<20.2", "==19.3.1", "", empty string will install the latest version) diff --git a/docs/clearml.conf b/docs/clearml.conf index 1ba3bcf..769b234 100644 --- a/docs/clearml.conf +++ b/docs/clearml.conf @@ -74,8 +74,10 @@ agent { # If Poetry is selected and the root repository contains `poetry.lock` or `pyproject.toml`, # the "installed packages" section is ignored, and poetry is used. # If Poetry is selected and no lock file is found, it reverts to "pip" package manager behaviour. + # If uv is selected and the root repository contains `uv.lock` or `pyproject.toml`, + # the "installed packages" section is ignored, and uv is used. package_manager: { - # supported options: pip, conda, poetry + # supported options: pip, conda, poetry, uv type: pip, # specify pip version to use (examples "<20.2", "==19.3.1", "", empty string will install the latest version) @@ -83,6 +85,8 @@ agent { # specify poetry version to use (examples "<2", "==1.1.1", "", empty string will install the latest version) # poetry_version: "<2", # poetry_install_extra_args: ["-v"] + # uv_version: ">0.4", + # uv_sync_extra_args: ["--all-extras"] # virtual environment inheres packages from system system_site_packages: false,