mirror of
https://github.com/clearml/clearml-agent
synced 2025-06-26 18:16:15 +00:00
Compare commits
41 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
afec38a50e | ||
|
|
f9c60904f4 | ||
|
|
a09dc85c67 | ||
|
|
5d74f4b376 | ||
|
|
d558c66d3c | ||
|
|
714c6a05d0 | ||
|
|
43b2f7f41d | ||
|
|
28d752d568 | ||
|
|
6d091d8e08 | ||
|
|
5c6b3ccc94 | ||
|
|
df10e6ed46 | ||
|
|
8ef78fd058 | ||
|
|
640c83288a | ||
|
|
788c79a66f | ||
|
|
bef87c7744 | ||
|
|
f139891276 | ||
|
|
2afaff1713 | ||
|
|
a57a5b151c | ||
|
|
97f446d523 | ||
|
|
a88262c097 | ||
|
|
284271c654 | ||
|
|
ae2775f7b8 | ||
|
|
eb012f5c24 | ||
|
|
06897f7606 | ||
|
|
599219b02d | ||
|
|
b6e04ab982 | ||
|
|
98fe162878 | ||
|
|
f829d80a49 | ||
|
|
b7e568e299 | ||
|
|
6912846326 | ||
|
|
224868c9a4 | ||
|
|
b1ca90a303 | ||
|
|
dee2475698 | ||
|
|
aeede81474 | ||
|
|
2d91d4cde6 | ||
|
|
7a11c7c165 | ||
|
|
a9f479cfcd | ||
|
|
c1d91b0d6a | ||
|
|
cbfba6acb2 | ||
|
|
f2e2e1f94a | ||
|
|
23668a403a |
@@ -40,6 +40,10 @@ agent {
|
||||
package_manager: {
|
||||
# supported options: pip, conda
|
||||
type: pip,
|
||||
|
||||
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
|
||||
# pip_version: "<20"
|
||||
|
||||
# virtual environment inheres packages from system
|
||||
system_site_packages: false,
|
||||
# install with --upgrade
|
||||
@@ -83,6 +87,17 @@ agent {
|
||||
# apt cache folder used mapped into docker, for ubuntu package caching
|
||||
docker_apt_cache = ~/.trains/apt-cache
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# these are local for this agent and will not be updated in the experiment's docker_cmd section
|
||||
# extra_docker_arguments: ["--ipc=host", ]
|
||||
|
||||
# optional shell script to run in docker when started before the experiment is started
|
||||
# extra_docker_shell_script: ["apt-get install -y bindfs", ]
|
||||
|
||||
# set to true in order to force "docker pull" before running an experiment using a docker image.
|
||||
# This makes sure the docker image is updated.
|
||||
docker_force_pull: false
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda"
|
||||
|
||||
59
examples/archive_experiments.py
Normal file
59
examples/archive_experiments.py
Normal file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/python3
|
||||
"""
|
||||
An example script that cleans up failed experiments by moving them to the archive
|
||||
"""
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
from trains_agent import APIClient
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument("--project", "-P", help="Project ID. Only clean up experiments from this project")
|
||||
parser.add_argument("--user", "-U", help="User ID. Only clean up experiments assigned to this user")
|
||||
parser.add_argument(
|
||||
"--status", "-S", default="failed",
|
||||
help="Experiment status. Only clean up experiments with this status (default %(default)s)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--iterations", "-I", type=int,
|
||||
help="Number of iterations. Only clean up experiments with less or equal number of iterations"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sec-from-start", "-T", type=int,
|
||||
help="Seconds from start time. "
|
||||
"Only clean up experiments if less or equal number of seconds have elapsed since started"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
client = APIClient()
|
||||
|
||||
tasks = client.tasks.get_all(
|
||||
project=[args.project] if args.project else None,
|
||||
user=[args.user] if args.user else None,
|
||||
status=[args.status] if args.status else None,
|
||||
system_tags=["-archived"]
|
||||
)
|
||||
|
||||
count = 0
|
||||
|
||||
for task in tasks:
|
||||
if args.iterations and (task.last_iteration or 0) > args.iterations:
|
||||
continue
|
||||
if args.sec_from_start:
|
||||
if not task.started:
|
||||
continue
|
||||
if (datetime.utcnow() - task.started.replace(tzinfo=None)).total_seconds() > args.sec_from_start:
|
||||
continue
|
||||
|
||||
try:
|
||||
client.tasks.edit(
|
||||
task=task.id,
|
||||
system_tags=(task.system_tags or []) + ["archived"],
|
||||
force=True
|
||||
)
|
||||
count += 1
|
||||
except Exception as ex:
|
||||
print("Failed editing experiment: {}".format(ex))
|
||||
|
||||
print("Cleaned up {} experiments".format(count))
|
||||
@@ -4,10 +4,10 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Auto-Magically Spin AWS EC2 Instances on Demand \n",
|
||||
"# And Create a Dynamic Cluster Running *Trains-Agent*\n",
|
||||
"# Auto-Magically Spin AWS EC2 Instances On Demand \n",
|
||||
"# and Create a Dynamic Cluster Running *Trains-Agent*\n",
|
||||
"\n",
|
||||
"### Define your budget and execute the notebook, That's is it\n",
|
||||
"### Define your budget and execute the notebook, that's it\n",
|
||||
"### You now have a fully managed cluster on AWS 🎉 🎊 "
|
||||
]
|
||||
},
|
||||
@@ -166,7 +166,11 @@
|
||||
"# echo \"This is the second line\"\n",
|
||||
"# \"\"\"\n",
|
||||
"EXTRA_BASH_SCRIPT = \"\"\"\n",
|
||||
"\"\"\""
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"# Default docker for trains-agent when running in docker mode (requires docker v19.03 and above). \n",
|
||||
"# Leave empty to run trains-agent in non-docker mode.\n",
|
||||
"DEFAULT_DOCKER_IMAGE = \"nvidia/cuda\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -180,7 +184,8 @@
|
||||
"# maximum idle time in minutes, after which the instance will be shutdown\n",
|
||||
"MAX_IDLE_TIME_MIN = 15\n",
|
||||
"# polling interval in minutes\n",
|
||||
"POLLING_INTERVAL_MIN = 2.0"
|
||||
"# make sure to increase in case bash commands were added in EXTRA_BASH_SCRIPT\n",
|
||||
"POLLING_INTERVAL_MIN = 5.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -264,17 +269,21 @@
|
||||
" # user_data script will automatically run when the instance is started. \n",
|
||||
" # It will install the required packages for trains-agent configure it using \n",
|
||||
" # environment variables and run trains-agent on the required queue\n",
|
||||
" user_data = \"\"\"#!/bin/bash \n",
|
||||
" user_data = \"\"\"#!/bin/bash\n",
|
||||
" sudo apt-get update\n",
|
||||
" sudo apt-get install -y python3-dev\n",
|
||||
" sudo apt-get install -y python3-pip\n",
|
||||
" sudo apt-get install -y gcc\n",
|
||||
" sudo apt-get install -y git\n",
|
||||
" sudo python3 -m pip install screen\n",
|
||||
" sudo python3 -m pip install trains-agent\n",
|
||||
" sudo apt-get install -y build-essential\n",
|
||||
" python3 -m pip install -U pip\n",
|
||||
" python3 -m pip install virtualenv\n",
|
||||
" python3 -m virtualenv trains_agent_venv\n",
|
||||
" source trains_agent_venv/bin/activate\n",
|
||||
" python -m pip install trains-agent\n",
|
||||
" echo 'agent.git_user=\\\"{git_user}\\\"' >> /root/trains.conf\n",
|
||||
" echo 'agent.git_pass=\\\"{git_pass}\\\"' >> /root/trains.conf\n",
|
||||
" echo {trains_conf} >> /root/trains.conf\n",
|
||||
" echo \"{trains_conf}\" >> /root/trains.conf\n",
|
||||
" export TRAINS_API_HOST={api_server}\n",
|
||||
" export TRAINS_WEB_HOST={web_server}\n",
|
||||
" export TRAINS_FILES_HOST={files_server}\n",
|
||||
@@ -282,9 +291,11 @@
|
||||
" export TRAINS_WORKER_ID={worker_id}:$DYNAMIC_INSTANCE_ID\n",
|
||||
" export TRAINS_API_ACCESS_KEY='{access_key}'\n",
|
||||
" export TRAINS_API_SECRET_KEY='{secret_key}'\n",
|
||||
" screen\n",
|
||||
" {bash_script}\n",
|
||||
" python3 -m trains_agent --config-file '/root/trains.conf' daemon --queue '{queue}' --docker\"\"\".format(\n",
|
||||
" source ~/.bashrc\n",
|
||||
" python -m trains_agent --config-file '/root/trains.conf' daemon --queue '{queue}' {docker}\n",
|
||||
" shutdown\n",
|
||||
" \"\"\".format(\n",
|
||||
" api_server=TRAINS_SERVER_API_SERVER,\n",
|
||||
" web_server=TRAINS_SERVER_WEB_SERVER,\n",
|
||||
" files_server=TRAINS_SERVER_FILES_SERVER,\n",
|
||||
@@ -295,7 +306,8 @@
|
||||
" git_user=TRAINS_GIT_USER,\n",
|
||||
" git_pass=TRAINS_GIT_PASS,\n",
|
||||
" trains_conf=EXTRA_TRAINS_CONF_ENCODED,\n",
|
||||
" bash_script=EXTRA_BASH_SCRIPT\n",
|
||||
" bash_script=EXTRA_BASH_SCRIPT,\n",
|
||||
" docker=\"--docker '{}'\".format(DEFAULT_DOCKER_IMAGE) if DEFAULT_DOCKER_IMAGE else \"\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" ec2 = boto3.client(\n",
|
||||
@@ -344,6 +356,7 @@
|
||||
" MaxCount=1,\n",
|
||||
" InstanceType=resource_conf[\"instance_type\"],\n",
|
||||
" UserData=user_data,\n",
|
||||
" InstanceInitiatedShutdownBehavior='terminate',\n",
|
||||
" BlockDeviceMappings=[\n",
|
||||
" {\n",
|
||||
" \"DeviceName\": resource_conf[\"ebs_device_name\"],\n",
|
||||
|
||||
@@ -5,6 +5,7 @@ future>=0.16.0
|
||||
humanfriendly>=2.1
|
||||
jsonmodels>=2.2
|
||||
jsonschema>=2.6.0
|
||||
packaging>=16.0
|
||||
pathlib2>=2.3.0
|
||||
psutil>=3.4.2
|
||||
pyhocon>=0.3.38
|
||||
@@ -15,9 +16,8 @@ PyYAML>=3.12
|
||||
requests-file>=1.4.2
|
||||
requests>=2.20.0
|
||||
requirements_parser>=0.2.0
|
||||
semantic_version>=2.6.0
|
||||
six>=1.11.0
|
||||
tqdm>=4.19.5
|
||||
typing>=3.6.4
|
||||
urllib3>=1.21.1
|
||||
virtualenv>=16
|
||||
virtualenv>=16,<20
|
||||
|
||||
@@ -35,7 +35,7 @@ def trains_agentyaml(tmpdir):
|
||||
def _method(template_file):
|
||||
file = tmpdir.join("trains_agent.yaml")
|
||||
with (PROJECT_ROOT / "tests/templates" / template_file).open() as f:
|
||||
code = yaml.load(f)
|
||||
code = yaml.load(f, Loader=yaml.SafeLoader)
|
||||
yield Namespace(code=code, file=file.strpath)
|
||||
file.write(yaml.dump(code))
|
||||
return _method
|
||||
|
||||
@@ -22,9 +22,12 @@
|
||||
# currently supported pip and conda
|
||||
# poetry is used if pip selected and repository contains poetry.lock file
|
||||
package_manager: {
|
||||
# supported options: pip, conda
|
||||
# supported options: pip, conda, poetry
|
||||
type: pip,
|
||||
|
||||
# specify pip version to use (examples "<20", "==19.3.1", "", empty string will install the latest version)
|
||||
pip_version: "<20",
|
||||
|
||||
# virtual environment inheres packages from system
|
||||
system_site_packages: false,
|
||||
|
||||
@@ -68,6 +71,17 @@
|
||||
# apt cache folder used mapped into docker, for ubuntu package caching
|
||||
docker_apt_cache = ~/.trains/apt-cache
|
||||
|
||||
# optional arguments to pass to docker image
|
||||
# these are local for this agent and will not be updated in the experiment's docker_cmd section
|
||||
# extra_docker_arguments: ["--ipc=host", ]
|
||||
|
||||
# optional shell script to run in docker when started before the experiment is started
|
||||
# extra_docker_shell_script: ["apt-get install -y bindfs", ]
|
||||
|
||||
# set to true in order to force "docker pull" before running an experiment using a docker image.
|
||||
# This makes sure the docker image is updated.
|
||||
docker_force_pull: false
|
||||
|
||||
default_docker: {
|
||||
# default docker image to use when running in docker mode
|
||||
image: "nvidia/cuda"
|
||||
|
||||
@@ -4,6 +4,7 @@ from .v2_4 import queues
|
||||
from .v2_4 import tasks
|
||||
from .v2_4 import workers
|
||||
from .v2_4 import events
|
||||
from .v2_4 import models
|
||||
|
||||
__all__ = [
|
||||
'auth',
|
||||
@@ -12,4 +13,5 @@ __all__ = [
|
||||
'tasks',
|
||||
'workers',
|
||||
'events',
|
||||
'models',
|
||||
]
|
||||
|
||||
2850
trains_agent/backend_api/services/v2_4/models.py
Normal file
2850
trains_agent/backend_api/services/v2_4/models.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -59,11 +59,12 @@ from trains_agent.helper.base import (
|
||||
is_conda,
|
||||
named_temporary_file,
|
||||
ExecutionInfo,
|
||||
HOCONEncoder, error, get_python_path)
|
||||
HOCONEncoder, error, get_python_path, is_linux_platform)
|
||||
from trains_agent.helper.console import ensure_text
|
||||
from trains_agent.helper.package.base import PackageManager
|
||||
from trains_agent.helper.package.conda_api import CondaAPI
|
||||
from trains_agent.helper.package.horovod_req import HorovodRequirement
|
||||
from trains_agent.helper.package.external_req import ExternalRequirements
|
||||
from trains_agent.helper.package.pip_api.system import SystemPip
|
||||
from trains_agent.helper.package.pip_api.venv import VirtualenvPip
|
||||
from trains_agent.helper.package.poetry_api import PoetryConfig, PoetryAPI
|
||||
@@ -228,6 +229,8 @@ class TaskStopSignal(object):
|
||||
return self._test()
|
||||
except Exception as ex:
|
||||
self.command.log_traceback(ex)
|
||||
# make sure we break nothing
|
||||
return TaskStopSignal.default
|
||||
|
||||
def _test(self):
|
||||
# type: () -> TaskStopReason
|
||||
@@ -287,6 +290,7 @@ class Worker(ServiceCommandSection):
|
||||
PytorchRequirement,
|
||||
CythonRequirement,
|
||||
HorovodRequirement,
|
||||
ExternalRequirements,
|
||||
)
|
||||
|
||||
# poll queues every _polling_interval seconds
|
||||
@@ -350,10 +354,13 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
self.is_venv_update = self._session.config.agent.venv_update.enabled
|
||||
self.poetry = PoetryConfig(self._session)
|
||||
self.poetry.initialize()
|
||||
self.docker_image_func = None
|
||||
self._docker_image = None
|
||||
self._docker_arguments = None
|
||||
PackageManager.set_pip_version(self._session.config.get("agent.package_manager.pip_version", None))
|
||||
self._extra_docker_arguments = self._session.config.get("agent.extra_docker_arguments", None)
|
||||
self._extra_shell_script = self._session.config.get("agent.extra_docker_shell_script", None)
|
||||
self._docker_force_pull = self._session.config.get("agent.docker_force_pull", False)
|
||||
self._daemon_foreground = None
|
||||
self._standalone_mode = None
|
||||
|
||||
@@ -413,6 +420,7 @@ class Worker(ServiceCommandSection):
|
||||
)
|
||||
)
|
||||
|
||||
docker_image = None
|
||||
if self.docker_image_func:
|
||||
try:
|
||||
response = get_task(self._session, task_id, only_fields=["execution.docker_cmd"])
|
||||
@@ -467,6 +475,19 @@ class Worker(ServiceCommandSection):
|
||||
try:
|
||||
# set WORKER ID
|
||||
os.environ['TRAINS_WORKER_ID'] = self.worker_id
|
||||
|
||||
if self._docker_force_pull and docker_image:
|
||||
full_pull_cmd = ['docker', 'pull', docker_image]
|
||||
pull_cmd = Argv(*full_pull_cmd)
|
||||
status, stop_signal_status = self._log_command_output(
|
||||
task_id=task_id,
|
||||
cmd=pull_cmd,
|
||||
stdout_path=temp_stdout_name,
|
||||
stderr_path=temp_stderr_name,
|
||||
daemon=True,
|
||||
stop_signal=stop_signal,
|
||||
)
|
||||
|
||||
status, stop_signal_status = self._log_command_output(
|
||||
task_id=task_id,
|
||||
cmd=cmd,
|
||||
@@ -736,9 +757,9 @@ class Worker(ServiceCommandSection):
|
||||
|
||||
stdout = open(stdout_path, "wt")
|
||||
stderr = open(stderr_path, "wt") if stderr_path else stdout
|
||||
stdout_line_count, stdout_last_lines = 0, []
|
||||
stderr_line_count, stderr_last_lines = 0, []
|
||||
try:
|
||||
stdout_line_count, stdout_last_lines = 0, []
|
||||
stderr_line_count, stderr_last_lines = 0, []
|
||||
status = None
|
||||
stopping = False
|
||||
_last_machine_update_ts = time()
|
||||
@@ -761,7 +782,7 @@ class Worker(ServiceCommandSection):
|
||||
if daemon:
|
||||
self.send_logs(
|
||||
task_id=task_id,
|
||||
lines=["User aborted: stopping task\n"],
|
||||
lines=["User aborted: stopping task ({})\n".format(str(stop_reason))],
|
||||
level="ERROR",
|
||||
)
|
||||
kill_all_child_processes(process.pid)
|
||||
@@ -786,6 +807,16 @@ class Worker(ServiceCommandSection):
|
||||
# non zero return code
|
||||
stop_reason = 'Exception occurred'
|
||||
status = ex.returncode
|
||||
except KeyboardInterrupt:
|
||||
# so someone else will catch us
|
||||
raise
|
||||
except Exception:
|
||||
# we should not get here, but better safe than sorry
|
||||
stdout_line_count += self.send_logs(task_id, _print_file(stdout_path, stdout_line_count))
|
||||
if stderr_path:
|
||||
stderr_line_count += self.send_logs(task_id, _print_file(stderr_path, stderr_line_count))
|
||||
stop_reason = 'Exception occurred'
|
||||
status = -1
|
||||
|
||||
stdout.close()
|
||||
if stderr_path:
|
||||
@@ -876,8 +907,6 @@ class Worker(ServiceCommandSection):
|
||||
):
|
||||
if not task_id:
|
||||
raise CommandFailedError("Worker build must have valid task id")
|
||||
if not check_if_command_exists("virtualenv"):
|
||||
raise CommandFailedError("Worker must have virtualenv installed")
|
||||
|
||||
self._session.print_configuration()
|
||||
|
||||
@@ -907,8 +936,9 @@ class Worker(ServiceCommandSection):
|
||||
repo_info,
|
||||
requirements_manager=requirements_manager,
|
||||
cached_requirements=requirements,
|
||||
cwd=vcs.location if vcs and vcs.location else directory,
|
||||
)
|
||||
freeze = self.freeze_task_environment()
|
||||
freeze = self.freeze_task_environment(requirements_manager=requirements_manager)
|
||||
script_dir = directory
|
||||
|
||||
# Summary
|
||||
@@ -1006,8 +1036,6 @@ class Worker(ServiceCommandSection):
|
||||
):
|
||||
if not task_id:
|
||||
raise CommandFailedError("Worker execute must have valid task id")
|
||||
if not check_if_command_exists("virtualenv"):
|
||||
raise CommandFailedError("Worker must have virtualenv installed")
|
||||
|
||||
try:
|
||||
current_task = self._session.api_client.tasks.get_by_id(task_id)
|
||||
@@ -1079,11 +1107,13 @@ class Worker(ServiceCommandSection):
|
||||
repo_info,
|
||||
requirements_manager=requirements_manager,
|
||||
cached_requirements=requirements,
|
||||
cwd=vcs.location if vcs and vcs.location else directory,
|
||||
)
|
||||
|
||||
# do not update the task packages if we are using conda,
|
||||
# it will most likely make the task environment unreproducible
|
||||
freeze = self.freeze_task_environment(current_task.id if not self.is_conda else None)
|
||||
freeze = self.freeze_task_environment(current_task.id if not self.is_conda else None,
|
||||
requirements_manager=requirements_manager)
|
||||
script_dir = (directory if isinstance(directory, Path) else Path(directory)).absolute().as_posix()
|
||||
|
||||
# run code
|
||||
@@ -1124,7 +1154,8 @@ class Worker(ServiceCommandSection):
|
||||
self._update_commit_id(task_id, execution, repo_info)
|
||||
|
||||
# Add the script CWD to the python path
|
||||
python_path = get_python_path(script_dir, execution.entry_point, self.package_api)
|
||||
python_path = get_python_path(script_dir, execution.entry_point, self.package_api) \
|
||||
if not self.is_conda else None
|
||||
if python_path:
|
||||
os.environ['PYTHONPATH'] = python_path
|
||||
|
||||
@@ -1132,11 +1163,12 @@ class Worker(ServiceCommandSection):
|
||||
exit_code = -1
|
||||
try:
|
||||
if disable_monitoring:
|
||||
use_execv = is_linux_platform() and not isinstance(self.package_api, (PoetryAPI, CondaAPI))
|
||||
try:
|
||||
sys.stdout.flush()
|
||||
sys.stderr.flush()
|
||||
os.chdir(script_dir)
|
||||
if not is_windows_platform():
|
||||
if use_execv:
|
||||
os.execv(command.argv[0].as_posix(), tuple([command.argv[0].as_posix()])+command.argv[1:])
|
||||
else:
|
||||
exit_code = command.check_call(cwd=script_dir)
|
||||
@@ -1144,10 +1176,10 @@ class Worker(ServiceCommandSection):
|
||||
except subprocess.CalledProcessError as ex:
|
||||
# non zero return code
|
||||
exit_code = ex.returncode
|
||||
if is_windows_platform():
|
||||
if not use_execv:
|
||||
exit(exit_code)
|
||||
except Exception as ex:
|
||||
if is_windows_platform():
|
||||
if not use_execv:
|
||||
exit(-1)
|
||||
raise ex
|
||||
else:
|
||||
@@ -1346,13 +1378,17 @@ class Worker(ServiceCommandSection):
|
||||
status_message=self._task_status_change_message,
|
||||
)
|
||||
|
||||
def freeze_task_environment(self, task_id=None):
|
||||
def freeze_task_environment(self, task_id=None, requirements_manager=None):
|
||||
try:
|
||||
freeze = self.package_api.freeze()
|
||||
except Exception as e:
|
||||
print("Could not freeze installed packages")
|
||||
self.log_traceback(e)
|
||||
return None
|
||||
|
||||
if requirements_manager:
|
||||
freeze = requirements_manager.replace_back(freeze)
|
||||
|
||||
if not task_id:
|
||||
return freeze
|
||||
|
||||
@@ -1377,8 +1413,12 @@ class Worker(ServiceCommandSection):
|
||||
if not repo_info:
|
||||
return None
|
||||
try:
|
||||
if not self.poetry.enabled:
|
||||
return None
|
||||
self.poetry.initialize(cwd=repo_info.root)
|
||||
api = self.poetry.get_api(repo_info.root)
|
||||
if api.enabled:
|
||||
print('Poetry Enabled: Ignoring requested python packages, using repository poetry lock file!')
|
||||
api.install()
|
||||
return api
|
||||
except Exception:
|
||||
@@ -1386,7 +1426,7 @@ class Worker(ServiceCommandSection):
|
||||
return None
|
||||
|
||||
def install_requirements(
|
||||
self, execution, repo_info, requirements_manager, cached_requirements=None
|
||||
self, execution, repo_info, requirements_manager, cached_requirements=None, cwd=None,
|
||||
):
|
||||
# type: (ExecutionInfo, RepoInfo, RequirementsManager, Optional[dict]) -> None
|
||||
"""
|
||||
@@ -1399,6 +1439,8 @@ class Worker(ServiceCommandSection):
|
||||
:param requirements_manager: requirements manager for task
|
||||
:param cached_requirements: cached requirements from previous run
|
||||
"""
|
||||
if self.package_api:
|
||||
self.package_api.cwd = cwd
|
||||
api = self._install_poetry_requirements(repo_info)
|
||||
if api:
|
||||
self.package_api = api
|
||||
@@ -1420,7 +1462,7 @@ class Worker(ServiceCommandSection):
|
||||
except Exception as e:
|
||||
self.log_traceback(e)
|
||||
cached_requirements_failed = True
|
||||
raise ValueError("Could not install task requirements!")
|
||||
raise ValueError("Could not install task requirements!\n{}".format(e))
|
||||
else:
|
||||
self.log("task requirements installation passed")
|
||||
return
|
||||
@@ -1595,14 +1637,20 @@ class Worker(ServiceCommandSection):
|
||||
requested_python_version = requested_python_version or \
|
||||
Text(self._session.config.get("agent.python_binary", None)) or \
|
||||
Text(self._session.config.get("agent.default_python", None))
|
||||
executable_version, executable_version_suffix, executable_name = self.find_python_executable_for_version(
|
||||
requested_python_version
|
||||
)
|
||||
if self.is_conda:
|
||||
executable_version_suffix = \
|
||||
requested_python_version[max(requested_python_version.find('python'), 0):].replace('python', '')
|
||||
executable_name = 'python'
|
||||
else:
|
||||
executable_version, executable_version_suffix, executable_name = self.find_python_executable_for_version(
|
||||
requested_python_version
|
||||
)
|
||||
self._session.config.put("agent.default_python", executable_version)
|
||||
self._session.config.put("agent.python_binary", executable_name)
|
||||
|
||||
venv_dir = Path(venv_dir) if venv_dir else \
|
||||
Path(self._session.config["agent.venvs_dir"], executable_version_suffix)
|
||||
|
||||
self._session.config.put("agent.default_python", executable_version)
|
||||
self._session.config.put("agent.python_binary", executable_name)
|
||||
first_time = not standalone_mode and (
|
||||
is_windows_platform()
|
||||
or self.is_conda
|
||||
@@ -1745,9 +1793,19 @@ class Worker(ServiceCommandSection):
|
||||
# store docker arguments
|
||||
self._docker_image = docker_image
|
||||
self._docker_arguments = docker_arguments
|
||||
|
||||
extra_shell_script_str = ""
|
||||
if self._extra_shell_script:
|
||||
cmds = self._extra_shell_script
|
||||
if not isinstance(cmds, (list, tuple)):
|
||||
cmds = [cmds]
|
||||
extra_shell_script_str = " ; ".join(map(str, cmds)) + " ; "
|
||||
|
||||
docker_cmd = dict(worker_id=self.worker_id,
|
||||
# docker_image=docker_image,
|
||||
# docker_arguments=docker_arguments,
|
||||
extra_docker_arguments=self._extra_docker_arguments,
|
||||
extra_shell_script=extra_shell_script_str,
|
||||
python_version=python_version, conf_file=self.temp_config_path,
|
||||
host_apt_cache=host_apt_cache,
|
||||
host_pip_cache=host_pip_cache,
|
||||
@@ -1766,7 +1824,8 @@ class Worker(ServiceCommandSection):
|
||||
host_ssh_cache,
|
||||
host_cache, mounted_cache,
|
||||
host_pip_dl, mounted_pip_dl,
|
||||
host_vcs_cache, mounted_vcs_cache, standalone_mode=False):
|
||||
host_vcs_cache, mounted_vcs_cache,
|
||||
standalone_mode=False, extra_docker_arguments=None, extra_shell_script=None):
|
||||
docker = 'docker'
|
||||
|
||||
base_cmd = [docker, 'run', '-t']
|
||||
@@ -1783,6 +1842,11 @@ class Worker(ServiceCommandSection):
|
||||
if isinstance(docker_arguments, (list, tuple)) else [docker_arguments]
|
||||
base_cmd += [a for a in docker_arguments if a]
|
||||
|
||||
if extra_docker_arguments:
|
||||
extra_docker_arguments = [extra_docker_arguments] \
|
||||
if isinstance(extra_docker_arguments, six.string_types) else extra_docker_arguments
|
||||
base_cmd += [str(a) for a in extra_docker_arguments if a]
|
||||
|
||||
base_cmd += ['-e', 'TRAINS_WORKER_ID='+worker_id, ]
|
||||
|
||||
if host_ssh_cache:
|
||||
@@ -1807,10 +1871,11 @@ class Worker(ServiceCommandSection):
|
||||
"chown -R root /root/.cache/pip ; " \
|
||||
"apt-get update ; " \
|
||||
"apt-get install -y git libsm6 libxext6 libxrender-dev libglib2.0-0 {python_single_digit}-pip ; " \
|
||||
"{python} -m pip install -U pip ; " \
|
||||
"{python} -m pip install -U \"pip{pip_version}\" ; " \
|
||||
"{python} -m pip install -U trains-agent{specify_version} ; ".format(
|
||||
python_single_digit=python_version.split('.')[0],
|
||||
python=python_version, specify_version=specify_version)
|
||||
python=python_version, pip_version=PackageManager.get_pip_version(),
|
||||
specify_version=specify_version)
|
||||
|
||||
base_cmd += [
|
||||
'-v', conf_file+':/root/trains.conf',
|
||||
@@ -1821,6 +1886,7 @@ class Worker(ServiceCommandSection):
|
||||
'-v', host_vcs_cache+':'+mounted_vcs_cache,
|
||||
'--rm', docker_image, 'bash', '-c',
|
||||
update_scheme +
|
||||
extra_shell_script +
|
||||
"NVIDIA_VISIBLE_DEVICES=all {python} -u -m trains_agent ".format(python=python_version)
|
||||
]
|
||||
|
||||
|
||||
@@ -157,6 +157,10 @@ def is_windows_platform():
|
||||
return any(platform.win32_ver())
|
||||
|
||||
|
||||
def is_linux_platform():
|
||||
return 'linux' in platform.system().lower()
|
||||
|
||||
|
||||
def normalize_path(*paths):
|
||||
"""
|
||||
normalize_path
|
||||
|
||||
@@ -4,7 +4,7 @@ from time import sleep
|
||||
import requests
|
||||
import json
|
||||
from threading import Thread
|
||||
from semantic_version import Version
|
||||
from packaging import version as packaging_version
|
||||
from ..version import __version__
|
||||
|
||||
__check_update_thread = None
|
||||
@@ -30,8 +30,8 @@ def _check_new_version_available():
|
||||
return None
|
||||
trains_answer = update_server_releases.get("trains-agent", {})
|
||||
latest_version = trains_answer.get("version")
|
||||
cur_version = Version(cur_version)
|
||||
latest_version = Version(latest_version)
|
||||
cur_version = packaging_version.parse(cur_version)
|
||||
latest_version = packaging_version.parse(latest_version or '')
|
||||
if cur_version >= latest_version:
|
||||
return None
|
||||
patch_upgrade = latest_version.major == cur_version.major and latest_version.minor == cur_version.minor
|
||||
|
||||
@@ -16,6 +16,8 @@ class PackageManager(object):
|
||||
"""
|
||||
|
||||
_selected_manager = None
|
||||
_cwd = None
|
||||
_pip_version = None
|
||||
|
||||
@abc.abstractproperty
|
||||
def bin(self):
|
||||
@@ -64,7 +66,7 @@ class PackageManager(object):
|
||||
pass
|
||||
|
||||
def upgrade_pip(self):
|
||||
return self._install("pip", "--upgrade")
|
||||
return self._install("pip"+self.get_pip_version(), "--upgrade")
|
||||
|
||||
def get_python_command(self, extra=()):
|
||||
# type: (...) -> Executable
|
||||
@@ -97,11 +99,42 @@ class PackageManager(object):
|
||||
# this is helpful when we want out of context requirement installations
|
||||
PackageManager._selected_manager = self
|
||||
|
||||
@property
|
||||
def cwd(self):
|
||||
return self._cwd
|
||||
|
||||
@cwd.setter
|
||||
def cwd(self, value):
|
||||
self._cwd = value
|
||||
|
||||
@classmethod
|
||||
def out_of_scope_install_package(cls, package_name):
|
||||
def out_of_scope_install_package(cls, package_name, *args):
|
||||
if PackageManager._selected_manager is not None:
|
||||
try:
|
||||
return PackageManager._selected_manager._install(package_name)
|
||||
return PackageManager._selected_manager._install(package_name, *args)
|
||||
except Exception:
|
||||
pass
|
||||
return
|
||||
return
|
||||
|
||||
@classmethod
|
||||
def out_of_scope_freeze(cls):
|
||||
if PackageManager._selected_manager is not None:
|
||||
try:
|
||||
return PackageManager._selected_manager.freeze()
|
||||
except Exception:
|
||||
pass
|
||||
return []
|
||||
|
||||
@classmethod
|
||||
def set_pip_version(cls, version):
|
||||
if not version:
|
||||
return
|
||||
version = version.replace(' ', '')
|
||||
if ('=' in version) or ('~' in version) or ('<' in version) or ('>' in version):
|
||||
cls._pip_version = version
|
||||
else:
|
||||
cls._pip_version = "=="+version
|
||||
|
||||
@classmethod
|
||||
def get_pip_version(cls):
|
||||
return cls._pip_version or ''
|
||||
|
||||
@@ -14,7 +14,7 @@ import yaml
|
||||
from time import time
|
||||
from attr import attrs, attrib, Factory
|
||||
from pathlib2 import Path
|
||||
from semantic_version import Version
|
||||
from packaging import version as packaging_version
|
||||
from requirements import parse
|
||||
from requirements.requirement import Requirement
|
||||
|
||||
@@ -59,7 +59,7 @@ class CondaAPI(PackageManager):
|
||||
A programmatic interface for controlling conda
|
||||
"""
|
||||
|
||||
MINIMUM_VERSION = Version("4.3.30", partial=True)
|
||||
MINIMUM_VERSION = packaging_version.parse("4.3.30")
|
||||
|
||||
def __init__(self, session, path, python, requirements_manager):
|
||||
# type: (Session, PathLike, float, RequirementsManager) -> None
|
||||
@@ -93,7 +93,7 @@ class CondaAPI(PackageManager):
|
||||
)
|
||||
)
|
||||
self.conda_version = self.get_conda_version(output)
|
||||
if Version(self.conda_version, partial=True) < self.MINIMUM_VERSION:
|
||||
if packaging_version.parse(self.conda_version) < self.MINIMUM_VERSION:
|
||||
raise CommandFailedError(
|
||||
"conda version '{}' is smaller than minimum supported conda version '{}'".format(
|
||||
self.conda_version, self.MINIMUM_VERSION
|
||||
@@ -227,20 +227,20 @@ class CondaAPI(PackageManager):
|
||||
self.pip.install_from_file(reqs)
|
||||
|
||||
def freeze(self):
|
||||
# result = yaml.load(
|
||||
# self._run_command((self.conda, "env", "export", "-p", self.path), raw=True)
|
||||
# )
|
||||
# for key in "name", "prefix":
|
||||
# result.pop(key, None)
|
||||
# freeze = {"conda": result}
|
||||
# try:
|
||||
# freeze["pip"] = result["dependencies"][-1]["pip"]
|
||||
# except (TypeError, KeyError):
|
||||
# freeze["pip"] = []
|
||||
# else:
|
||||
# del result["dependencies"][-1]
|
||||
# return freeze
|
||||
return self.pip.freeze()
|
||||
requirements = self.pip.freeze()
|
||||
try:
|
||||
conda_packages = json.loads(self._run_command((self.conda, "list", "--json", "-p", self.path), raw=True))
|
||||
conda_packages_txt = []
|
||||
requirements_pip = [r.split('==')[0].strip().lower() for r in requirements['pip']]
|
||||
for pkg in conda_packages:
|
||||
# skip if this is a pypi package or it is not a python package at all
|
||||
if pkg['channel'] == 'pypi' or pkg['name'].lower() not in requirements_pip:
|
||||
continue
|
||||
conda_packages_txt.append('{0}{1}{2}'.format(pkg['name'], '==', pkg['version']))
|
||||
requirements['conda'] = conda_packages_txt
|
||||
except:
|
||||
pass
|
||||
return requirements
|
||||
|
||||
def load_requirements(self, requirements):
|
||||
# create new environment file
|
||||
@@ -249,6 +249,8 @@ class CondaAPI(PackageManager):
|
||||
reqs = []
|
||||
if isinstance(requirements['pip'], six.string_types):
|
||||
requirements['pip'] = requirements['pip'].split('\n')
|
||||
if isinstance(requirements.get('conda'), six.string_types):
|
||||
requirements['conda'] = requirements['conda'].split('\n')
|
||||
has_torch = False
|
||||
has_matplotlib = False
|
||||
try:
|
||||
@@ -256,35 +258,86 @@ class CondaAPI(PackageManager):
|
||||
except:
|
||||
cuda_version = 0
|
||||
|
||||
for r in requirements['pip']:
|
||||
marker = list(parse(r))
|
||||
if marker:
|
||||
m = MarkerRequirement(marker[0])
|
||||
if m.req.name.lower() == 'matplotlib':
|
||||
has_matplotlib = True
|
||||
elif m.req.name.lower().startswith('torch'):
|
||||
has_torch = True
|
||||
# notice 'conda' entry with empty string is a valid conda requirements list, it means pip only
|
||||
# this should happen if experiment was executed on non-conda machine or old trains client
|
||||
conda_supported_req = requirements['pip'] if requirements.get('conda', None) is None else requirements['conda']
|
||||
conda_supported_req_names = []
|
||||
for r in conda_supported_req:
|
||||
try:
|
||||
marker = list(parse(r))
|
||||
except:
|
||||
marker = None
|
||||
if not marker:
|
||||
continue
|
||||
|
||||
if m.req.name.lower() in ('torch', 'pytorch'):
|
||||
has_torch = True
|
||||
m.req.name = 'pytorch'
|
||||
m = MarkerRequirement(marker[0])
|
||||
conda_supported_req_names.append(m.name.lower())
|
||||
if m.req.name.lower() == 'matplotlib':
|
||||
has_matplotlib = True
|
||||
elif m.req.name.lower().startswith('torch'):
|
||||
has_torch = True
|
||||
|
||||
if m.req.name.lower() in ('tensorflow_gpu', 'tensorflow-gpu', 'tensorflow'):
|
||||
has_torch = True
|
||||
m.req.name = 'tensorflow-gpu' if cuda_version > 0 else 'tensorflow'
|
||||
if m.req.name.lower() in ('torch', 'pytorch'):
|
||||
has_torch = True
|
||||
m.req.name = 'pytorch'
|
||||
|
||||
if m.req.name.lower() in ('tensorflow_gpu', 'tensorflow-gpu', 'tensorflow'):
|
||||
has_torch = True
|
||||
m.req.name = 'tensorflow-gpu' if cuda_version > 0 else 'tensorflow'
|
||||
|
||||
reqs.append(m)
|
||||
|
||||
reqs.append(m)
|
||||
pip_requirements = []
|
||||
# if we have a conda list, the rest should be installed with pip,
|
||||
if requirements.get('conda', None) is not None:
|
||||
for r in requirements['pip']:
|
||||
try:
|
||||
marker = list(parse(r))
|
||||
except:
|
||||
marker = None
|
||||
if not marker:
|
||||
continue
|
||||
|
||||
m = MarkerRequirement(marker[0])
|
||||
m_name = m.name.lower()
|
||||
if m_name in conda_supported_req_names:
|
||||
# this package is in the conda list,
|
||||
# make sure that if we changed version and we match it in conda
|
||||
conda_supported_req_names.remove(m_name)
|
||||
for cr in reqs:
|
||||
if m_name == cr.name.lower():
|
||||
# match versions
|
||||
cr.specs = m.specs
|
||||
break
|
||||
else:
|
||||
# not in conda, it is a pip package
|
||||
pip_requirements.append(m)
|
||||
if m_name == 'matplotlib':
|
||||
has_matplotlib = True
|
||||
|
||||
# remove any leftover conda packages (they were removed from the pip list)
|
||||
if conda_supported_req_names:
|
||||
reqs = [r for r in reqs if r.name.lower() not in conda_supported_req_names]
|
||||
|
||||
# Conda requirements Hacks:
|
||||
if has_matplotlib:
|
||||
reqs.append(MarkerRequirement(Requirement.parse('graphviz')))
|
||||
reqs.append(MarkerRequirement(Requirement.parse('python-graphviz')))
|
||||
reqs.append(MarkerRequirement(Requirement.parse('kiwisolver')))
|
||||
if has_torch and cuda_version == 0:
|
||||
reqs.append(MarkerRequirement(Requirement.parse('cpuonly')))
|
||||
|
||||
# conform conda packages (version/name)
|
||||
for r in reqs:
|
||||
# remove .post from version numbers, it fails ~= version, and change == to ~=
|
||||
if r.specs and r.specs[0]:
|
||||
r.specs = [(r.specs[0][0].replace('==', '~='), r.specs[0][1].split('.post')[0])]
|
||||
# conda always likes "-" not "_"
|
||||
r.req.name = r.req.name.replace('_', '-')
|
||||
|
||||
while reqs:
|
||||
conda_env['dependencies'] = [r.tostr().replace('==', '=') for r in reqs]
|
||||
# notice, we give conda more freedom in version selection, to help it choose best combination
|
||||
conda_env['dependencies'] = [r.tostr() for r in reqs]
|
||||
with self.temp_file("conda_env", yaml.dump(conda_env), suffix=".yml") as name:
|
||||
print('Conda: Trying to install requirements:\n{}'.format(conda_env['dependencies']))
|
||||
result = self._run_command(
|
||||
@@ -297,7 +350,7 @@ class CondaAPI(PackageManager):
|
||||
|
||||
solved = False
|
||||
for bad_r in bad_req:
|
||||
name = bad_r.split('[')[0].split('=')[0]
|
||||
name = bad_r.split('[')[0].split('=')[0].split('~')[0].split('<')[0].split('>')[0]
|
||||
# look for name in requirements
|
||||
for r in reqs:
|
||||
if r.name.lower() == name.lower():
|
||||
@@ -338,7 +391,7 @@ class CondaAPI(PackageManager):
|
||||
if len(empty_lines) >= 2:
|
||||
deps = error_lines[empty_lines[0]+1:empty_lines[1]]
|
||||
try:
|
||||
return yaml.load('\n'.join(deps))
|
||||
return yaml.load('\n'.join(deps), Loader=yaml.SafeLoader)
|
||||
except:
|
||||
return None
|
||||
return None
|
||||
@@ -412,4 +465,4 @@ class PackageNotFoundError(CondaException):
|
||||
as a singleton YAML list.
|
||||
"""
|
||||
|
||||
pkg = attrib(default="", converter=lambda val: yaml.load(val)[0].replace(" ", ""))
|
||||
pkg = attrib(default="", converter=lambda val: yaml.load(val, Loader=yaml.SafeLoader)[0].replace(" ", ""))
|
||||
|
||||
@@ -6,14 +6,14 @@ from .requirements import SimpleSubstitution
|
||||
|
||||
class CythonRequirement(SimpleSubstitution):
|
||||
|
||||
name = "cython"
|
||||
name = ("cython", "numpy", )
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CythonRequirement, self).__init__(*args, **kwargs)
|
||||
|
||||
def match(self, req):
|
||||
# match both Cython & cython
|
||||
return self.name == req.name.lower()
|
||||
return req.name and req.name.lower() in self.name
|
||||
|
||||
def replace(self, req):
|
||||
"""
|
||||
|
||||
60
trains_agent/helper/package/external_req.py
Normal file
60
trains_agent/helper/package/external_req.py
Normal file
@@ -0,0 +1,60 @@
|
||||
from collections import OrderedDict
|
||||
from typing import Text
|
||||
|
||||
from .base import PackageManager
|
||||
from .requirements import SimpleSubstitution
|
||||
|
||||
|
||||
class ExternalRequirements(SimpleSubstitution):
|
||||
|
||||
name = "external_link"
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(ExternalRequirements, self).__init__(*args, **kwargs)
|
||||
self.post_install_req = []
|
||||
self.post_install_req_lookup = OrderedDict()
|
||||
|
||||
def match(self, req):
|
||||
# match both editable or code or unparsed
|
||||
if not (not req.name or req.req and (req.req.editable or req.req.vcs)):
|
||||
return False
|
||||
if not req.req or not req.req.line or not req.req.line.strip() or req.req.line.strip().startswith('#'):
|
||||
return False
|
||||
return True
|
||||
|
||||
def post_install(self):
|
||||
post_install_req = self.post_install_req
|
||||
self.post_install_req = []
|
||||
for req in post_install_req:
|
||||
try:
|
||||
freeze_base = PackageManager.out_of_scope_freeze() or ''
|
||||
except:
|
||||
freeze_base = ''
|
||||
PackageManager.out_of_scope_install_package(req.tostr(markers=False), "--no-deps")
|
||||
try:
|
||||
freeze_post = PackageManager.out_of_scope_freeze() or ''
|
||||
package_name = list(set(freeze_post['pip']) - set(freeze_base['pip']))
|
||||
if package_name and package_name[0] not in self.post_install_req_lookup:
|
||||
self.post_install_req_lookup[package_name[0]] = req.req.line
|
||||
except:
|
||||
pass
|
||||
PackageManager.out_of_scope_install_package(req.tostr(markers=False), "--ignore-installed")
|
||||
|
||||
def replace(self, req):
|
||||
"""
|
||||
Replace a requirement
|
||||
:raises: ValueError if version is pre-release
|
||||
"""
|
||||
# Store in post req install, and return nothing
|
||||
self.post_install_req.append(req)
|
||||
# mark skip package, we will install it in post install hook
|
||||
return Text('')
|
||||
|
||||
def replace_back(self, list_of_requirements):
|
||||
if 'pip' in list_of_requirements:
|
||||
original_requirements = list_of_requirements['pip']
|
||||
list_of_requirements['pip'] = [r for r in original_requirements
|
||||
if r not in self.post_install_req_lookup]
|
||||
list_of_requirements['pip'] += [self.post_install_req_lookup.get(r, '')
|
||||
for r in self.post_install_req_lookup.keys() if r in original_requirements]
|
||||
return list_of_requirements
|
||||
@@ -14,7 +14,7 @@ class HorovodRequirement(SimpleSubstitution):
|
||||
|
||||
def match(self, req):
|
||||
# match both horovod
|
||||
return self.name == req.name.lower()
|
||||
return req.name and self.name == req.name.lower()
|
||||
|
||||
def post_install(self):
|
||||
if self.post_install_req:
|
||||
|
||||
@@ -29,13 +29,13 @@ class SystemPip(PackageManager):
|
||||
pass
|
||||
|
||||
def install_from_file(self, path):
|
||||
self.run_with_env(('install', '-r', path) + self.install_flags())
|
||||
self.run_with_env(('install', '-r', path) + self.install_flags(), cwd=self.cwd)
|
||||
|
||||
def install_packages(self, *packages):
|
||||
self._install(*(packages + self.install_flags()))
|
||||
|
||||
def _install(self, *args):
|
||||
self.run_with_env(('install',) + args)
|
||||
self.run_with_env(('install',) + args, cwd=self.cwd)
|
||||
|
||||
def uninstall_packages(self, *packages):
|
||||
self.run_with_env(('uninstall', '-y') + packages)
|
||||
@@ -82,7 +82,7 @@ class SystemPip(PackageManager):
|
||||
return (command.get_output if output else command.check_call)(stdin=DEVNULL, **kwargs)
|
||||
|
||||
def _make_command(self, command):
|
||||
return Argv(self.bin, '-m', 'pip', *command)
|
||||
return Argv(self.bin, '-m', 'pip', '--disable-pip-version-check', *command)
|
||||
|
||||
def install_flags(self):
|
||||
if self.indices_args is None:
|
||||
|
||||
@@ -33,7 +33,7 @@ class VirtualenvPip(SystemPip, PackageManager):
|
||||
self.python = python
|
||||
|
||||
def _make_command(self, command):
|
||||
return self.session.command(self.bin, "-m", "pip", *command)
|
||||
return self.session.command(self.bin, "-m", "pip", "--disable-pip-version-check", *command)
|
||||
|
||||
def load_requirements(self, requirements):
|
||||
if isinstance(requirements, dict) and requirements.get("pip"):
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
from copy import deepcopy
|
||||
from functools import wraps
|
||||
|
||||
import attr
|
||||
import sys
|
||||
import os
|
||||
from pathlib2 import Path
|
||||
from trains_agent.helper.process import Argv, DEVNULL
|
||||
from trains_agent.helper.process import Argv, DEVNULL, check_if_command_exists
|
||||
from trains_agent.session import Session, POETRY
|
||||
|
||||
|
||||
@@ -35,10 +38,12 @@ def prop_guard(prop, log_prop=None):
|
||||
|
||||
class PoetryConfig:
|
||||
|
||||
def __init__(self, session):
|
||||
# type: (Session) -> ()
|
||||
def __init__(self, session, interpreter=None):
|
||||
# type: (Session, str) -> ()
|
||||
self.session = session
|
||||
self._log = session.get_logger(__name__)
|
||||
self._python = interpreter or sys.executable
|
||||
self._initialized = False
|
||||
|
||||
@property
|
||||
def log(self):
|
||||
@@ -53,7 +58,20 @@ class PoetryConfig:
|
||||
def run(self, *args, **kwargs):
|
||||
func = kwargs.pop("func", Argv.get_output)
|
||||
kwargs.setdefault("stdin", DEVNULL)
|
||||
argv = Argv("poetry", "-n", *args)
|
||||
kwargs['env'] = deepcopy(os.environ)
|
||||
if 'VIRTUAL_ENV' in kwargs['env'] or 'CONDA_PREFIX' in kwargs['env']:
|
||||
kwargs['env'].pop('VIRTUAL_ENV', None)
|
||||
kwargs['env'].pop('CONDA_PREFIX', None)
|
||||
kwargs['env'].pop('PYTHONPATH', None)
|
||||
if hasattr(sys, "real_prefix") and hasattr(sys, "base_prefix"):
|
||||
path = ':'+kwargs['env']['PATH']
|
||||
path = path.replace(':'+sys.base_prefix, ':'+sys.real_prefix, 1)
|
||||
kwargs['env']['PATH'] = path
|
||||
|
||||
if check_if_command_exists("poetry"):
|
||||
argv = Argv("poetry", *args)
|
||||
else:
|
||||
argv = Argv(self._python, "-m", "poetry", *args)
|
||||
self.log.debug("running: %s", argv)
|
||||
return func(argv, **kwargs)
|
||||
|
||||
@@ -61,10 +79,12 @@ class PoetryConfig:
|
||||
return self.run("config", *args, **kwargs)
|
||||
|
||||
@_guard_enabled
|
||||
def initialize(self):
|
||||
self._config("settings.virtualenvs.in-project", "true")
|
||||
# self._config("repositories.{}".format(self.REPO_NAME), PYTHON_INDEX)
|
||||
# self._config("http-basic.{}".format(self.REPO_NAME), *PYTHON_INDEX_CREDENTIALS)
|
||||
def initialize(self, cwd=None):
|
||||
if not self._initialized:
|
||||
self._initialized = True
|
||||
self._config("--local", "virtualenvs.in-project", "true", cwd=cwd)
|
||||
# self._config("repositories.{}".format(self.REPO_NAME), PYTHON_INDEX)
|
||||
# self._config("http-basic.{}".format(self.REPO_NAME), *PYTHON_INDEX_CREDENTIALS)
|
||||
|
||||
def get_api(self, path):
|
||||
# type: (Path) -> PoetryAPI
|
||||
@@ -81,7 +101,7 @@ class PoetryAPI(object):
|
||||
def install(self):
|
||||
# type: () -> bool
|
||||
if self.enabled:
|
||||
self.config.run("install", cwd=str(self.path), func=Argv.check_call)
|
||||
self.config.run("install", "-n", cwd=str(self.path), func=Argv.check_call)
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -92,10 +112,15 @@ class PoetryAPI(object):
|
||||
)
|
||||
|
||||
def freeze(self):
|
||||
return {"poetry": self.config.run("show", cwd=str(self.path)).splitlines()}
|
||||
lines = self.config.run("show", cwd=str(self.path)).splitlines()
|
||||
lines = [[p for p in line.split(' ') if p] for line in lines]
|
||||
return {"pip": [parts[0]+'=='+parts[1]+' # '+' '.join(parts[2:]) for parts in lines]}
|
||||
|
||||
def get_python_command(self, extra):
|
||||
return Argv("poetry", "run", "python", *extra)
|
||||
if check_if_command_exists("poetry"):
|
||||
return Argv("poetry", "run", "python", *extra)
|
||||
else:
|
||||
return Argv(self.config._python, "-m", "poetry", "run", "python", *extra)
|
||||
|
||||
def upgrade_pip(self, *args, **kwargs):
|
||||
pass
|
||||
|
||||
@@ -10,7 +10,8 @@ from typing import Text
|
||||
|
||||
import attr
|
||||
import requests
|
||||
from semantic_version import Version, Spec
|
||||
from packaging import version as packaging_version
|
||||
from packaging.specifiers import SpecifierSet
|
||||
|
||||
import six
|
||||
from .requirements import SimpleSubstitution, FatalSpecsResolutionError
|
||||
@@ -155,10 +156,16 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
self.os = os_name or self.get_platform()
|
||||
self.cuda = "cuda{}".format(self.cuda_version).lower()
|
||||
self.python_version_string = str(self.config["agent.default_python"])
|
||||
self.python_semantic_version = Version.coerce(
|
||||
self.python_version_string, partial=True
|
||||
)
|
||||
self.python = "python{}.{}".format(self.python_semantic_version.major, self.python_semantic_version.minor)
|
||||
self.python_major_minor_str = '.'.join(packaging_version.parse(
|
||||
self.python_version_string).base_version.split('.')[:2])
|
||||
if '.' not in self.python_major_minor_str:
|
||||
raise PytorchResolutionError(
|
||||
"invalid python version {!r} defined in configuration file, key 'agent.default_python': "
|
||||
"must have both major and minor parts of the version (for example: '3.7')".format(
|
||||
self.python_version_string
|
||||
)
|
||||
)
|
||||
self.python = "python{}".format(self.python_major_minor_str)
|
||||
|
||||
self.exceptions = [
|
||||
PytorchResolutionError(message)
|
||||
@@ -188,9 +195,7 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
"""
|
||||
Make sure python version has both major and minor versions as required for choosing pytorch wheel
|
||||
"""
|
||||
if self.is_pip and not (
|
||||
self.python_semantic_version.major and self.python_semantic_version.minor
|
||||
):
|
||||
if self.is_pip and not self.python_major_minor_str:
|
||||
raise PytorchResolutionError(
|
||||
"invalid python version {!r} defined in configuration file, key 'agent.default_python': "
|
||||
"must have both major and minor parts of the version (for example: '3.7')".format(
|
||||
@@ -215,8 +220,10 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
links_parser = LinksHTMLParser()
|
||||
links_parser.feed(requests.get(torch_url, timeout=10).text)
|
||||
platform_wheel = "win" if self.get_platform() == "windows" else self.get_platform()
|
||||
py_ver = "{0.major}{0.minor}".format(self.python_semantic_version)
|
||||
py_ver = self.python_major_minor_str.replace('.', '')
|
||||
url = None
|
||||
spec = SpecifierSet(req.format_specs())
|
||||
last_v = None
|
||||
# search for our package
|
||||
for l in links_parser.links:
|
||||
parts = l.split('/')[-1].split('-')
|
||||
@@ -225,21 +232,23 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
if parts[0] != req.name:
|
||||
continue
|
||||
# version (ignore +cpu +cu92 etc. + is %2B in the file link)
|
||||
if parts[1].split('%')[0].split('+')[0] != req.specs[0][1]:
|
||||
# version ignore .postX suffix (treat as regular version)
|
||||
try:
|
||||
v = packaging_version.parse(parts[1].split('%')[0].split('+')[0])
|
||||
except Exception:
|
||||
continue
|
||||
if v not in spec or (last_v and last_v > v):
|
||||
continue
|
||||
if not parts[2].endswith(py_ver):
|
||||
continue
|
||||
if platform_wheel not in parts[4]:
|
||||
continue
|
||||
url = '/'.join(torch_url.split('/')[:-1] + l.split('/'))
|
||||
break
|
||||
last_v = v
|
||||
|
||||
return url
|
||||
|
||||
def get_url_for_platform(self, req):
|
||||
assert self.package_manager == "pip"
|
||||
assert self.os != "mac"
|
||||
|
||||
# check if package is already installed with system packages
|
||||
try:
|
||||
if self.config.get("agent.package_manager.system_site_packages"):
|
||||
@@ -254,7 +263,8 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
pass
|
||||
|
||||
# make sure we have a specific version to retrieve
|
||||
assert req.specs
|
||||
if not req.specs:
|
||||
req.specs = [('>', '0')]
|
||||
|
||||
try:
|
||||
req.specs[0] = (req.specs[0][0], req.specs[0][1].split('+')[0])
|
||||
@@ -282,7 +292,7 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
if not url:
|
||||
url = PytorchWheel(
|
||||
torch_version=fix_version(version),
|
||||
python="{0.major}{0.minor}".format(self.python_semantic_version),
|
||||
python=self.python_major_minor_str.replace('.', ''),
|
||||
os_name=self.os,
|
||||
cuda_version=self.cuda_version,
|
||||
).make_url()
|
||||
@@ -296,13 +306,13 @@ class PytorchRequirement(SimpleSubstitution):
|
||||
@staticmethod
|
||||
def match_version(req, options):
|
||||
versioned_options = sorted(
|
||||
((Version(fix_version(key)), value) for key, value in options.items()),
|
||||
((packaging_version.parse(fix_version(key)), value) for key, value in options.items()),
|
||||
key=itemgetter(0),
|
||||
reverse=True,
|
||||
)
|
||||
req.specs = [(op, fix_version(version)) for op, version in req.specs]
|
||||
if req.specs:
|
||||
specs = Spec(req.format_specs())
|
||||
specs = SpecifierSet(req.format_specs())
|
||||
else:
|
||||
specs = None
|
||||
try:
|
||||
|
||||
@@ -8,9 +8,9 @@ from copy import deepcopy
|
||||
from itertools import chain, starmap
|
||||
from operator import itemgetter
|
||||
from os import path
|
||||
from typing import Text, List, Type, Optional, Tuple
|
||||
from typing import Text, List, Type, Optional, Tuple, Dict
|
||||
|
||||
import semantic_version
|
||||
from packaging import version as packaging_version
|
||||
from pathlib2 import Path
|
||||
from pyhocon import ConfigTree
|
||||
from requirements import parse
|
||||
@@ -48,7 +48,7 @@ class MarkerRequirement(object):
|
||||
|
||||
def tostr(self, markers=True):
|
||||
if not self.uri:
|
||||
parts = [self.name]
|
||||
parts = [self.name or self.line]
|
||||
|
||||
if self.extras:
|
||||
parts.append('[{0}]'.format(','.join(sorted(self.extras))))
|
||||
@@ -177,13 +177,20 @@ class SimpleSubstitution(RequirementSubstitution):
|
||||
|
||||
if req.specs:
|
||||
_, version_number = req.specs[0]
|
||||
assert semantic_version.Version(version_number, partial=True)
|
||||
assert packaging_version.parse(version_number)
|
||||
else:
|
||||
version_number = self.get_pip_version(self.name)
|
||||
|
||||
req.specs = [('==', version_number + self.suffix)]
|
||||
return Text(req)
|
||||
|
||||
def replace_back(self, list_of_requirements): # type: (Dict) -> Dict
|
||||
"""
|
||||
:param list_of_requirements: {'pip': ['a==1.0', ]}
|
||||
:return: {'pip': ['a==1.0', ]}
|
||||
"""
|
||||
return list_of_requirements
|
||||
|
||||
|
||||
@six.add_metaclass(ABCMeta)
|
||||
class CudaSensitiveSubstitution(SimpleSubstitution):
|
||||
@@ -235,15 +242,17 @@ class RequirementsManager(object):
|
||||
return None
|
||||
|
||||
def replace(self, requirements): # type: (Text) -> Text
|
||||
def safe_parse(req_str):
|
||||
try:
|
||||
return next(parse(req_str))
|
||||
except Exception as ex:
|
||||
return Requirement(req_str)
|
||||
|
||||
parsed_requirements = tuple(
|
||||
map(
|
||||
MarkerRequirement,
|
||||
filter(
|
||||
None,
|
||||
parse(requirements)
|
||||
if isinstance(requirements, six.text_type)
|
||||
else (next(parse(line), None) for line in requirements)
|
||||
)
|
||||
[safe_parse(line) for line in (requirements.splitlines()
|
||||
if isinstance(requirements, six.text_type) else requirements)]
|
||||
)
|
||||
)
|
||||
if not parsed_requirements:
|
||||
@@ -258,7 +267,7 @@ class RequirementsManager(object):
|
||||
warning('could not resolve python wheel replacement for {}'.format(req))
|
||||
raise
|
||||
except Exception:
|
||||
warning('could not resolve python wheel replacement for {}, '
|
||||
warning('could not resolve python wheel replacement for \"{}\", '
|
||||
'using original requirements line: {}'.format(req, i))
|
||||
return None
|
||||
|
||||
@@ -280,6 +289,14 @@ class RequirementsManager(object):
|
||||
except Exception as ex:
|
||||
print('RequirementsManager handler {} raised exception: {}'.format(h, ex))
|
||||
|
||||
def replace_back(self, requirements):
|
||||
for h in self.handlers:
|
||||
try:
|
||||
requirements = h.replace_back(requirements)
|
||||
except Exception:
|
||||
pass
|
||||
return requirements
|
||||
|
||||
@staticmethod
|
||||
def get_cuda_version(config): # type: (ConfigTree) -> (Text, Text)
|
||||
# we assume os.environ already updated the config['agent.cuda_version'] & config['agent.cudnn_version']
|
||||
|
||||
@@ -42,7 +42,9 @@ class VcsFactory(object):
|
||||
:param location: (desired) clone location
|
||||
"""
|
||||
url = execution_info.repository
|
||||
is_git = url.endswith(cls.GIT_SUFFIX)
|
||||
# We only support git, hg is deprecated
|
||||
is_git = True
|
||||
# is_git = url.endswith(cls.GIT_SUFFIX)
|
||||
vcs_cls = Git if is_git else Hg
|
||||
revision = (
|
||||
execution_info.version_num
|
||||
|
||||
@@ -1 +1 @@
|
||||
__version__ = '0.13.0'
|
||||
__version__ = '0.13.2rc2'
|
||||
|
||||
Reference in New Issue
Block a user