mirror of
https://github.com/clearml/clearml-session
synced 2025-03-13 15:18:09 +00:00
Merge branch 'allegroai:main' into main
This commit is contained in:
commit
b3ab43de13
42
README.md
42
README.md
@ -205,7 +205,7 @@ In the `clearml` web UI, find the experiment (Task) you wish to debug.
|
|||||||
Click on the ID button next to the Task name, and copy the unique ID.
|
Click on the ID button next to the Task name, and copy the unique ID.
|
||||||
|
|
||||||
``` bash
|
``` bash
|
||||||
clearml-session --debugging <experiment_id_here>
|
clearml-session --debugging-session <experiment_id_here>
|
||||||
```
|
```
|
||||||
|
|
||||||
Click on the JupyterLab/VSCode link, or connect directly to the SSH session
|
Click on the JupyterLab/VSCode link, or connect directly to the SSH session
|
||||||
@ -219,9 +219,12 @@ clearml-session --help
|
|||||||
``` console
|
``` console
|
||||||
clearml-session - CLI for launching JupyterLab / VSCode on a remote machine
|
clearml-session - CLI for launching JupyterLab / VSCode on a remote machine
|
||||||
usage: clearml-session [-h] [--version] [--attach [ATTACH]]
|
usage: clearml-session [-h] [--version] [--attach [ATTACH]]
|
||||||
[--debugging DEBUGGING] [--queue QUEUE]
|
[--debugging-session DEBUGGING_SESSION] [--queue QUEUE]
|
||||||
[--docker DOCKER] [--public-ip [true/false]]
|
[--docker DOCKER] [--docker-args DOCKER_ARGS]
|
||||||
|
[--public-ip [true/false]]
|
||||||
|
[--remote-ssh-port REMOTE_SSH_PORT]
|
||||||
[--vscode-server [true/false]]
|
[--vscode-server [true/false]]
|
||||||
|
[--vscode-version VSCODE_VERSION]
|
||||||
[--jupyter-lab [true/false]]
|
[--jupyter-lab [true/false]]
|
||||||
[--git-credentials [true/false]]
|
[--git-credentials [true/false]]
|
||||||
[--user-folder USER_FOLDER]
|
[--user-folder USER_FOLDER]
|
||||||
@ -231,11 +234,11 @@ usage: clearml-session [-h] [--version] [--attach [ATTACH]]
|
|||||||
[--config-file CONFIG_FILE]
|
[--config-file CONFIG_FILE]
|
||||||
[--remote-gateway [REMOTE_GATEWAY]]
|
[--remote-gateway [REMOTE_GATEWAY]]
|
||||||
[--base-task-id BASE_TASK_ID] [--project PROJECT]
|
[--base-task-id BASE_TASK_ID] [--project PROJECT]
|
||||||
[--disable-keepalive]
|
[--keepalive [true/false]]
|
||||||
[--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]]
|
[--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]]
|
||||||
[--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]]
|
[--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]]
|
||||||
[--skip-docker-network] [--password PASSWORD]
|
[--skip-docker-network] [--password PASSWORD]
|
||||||
[--username USERNAME]
|
[--username USERNAME] [--verbose]
|
||||||
|
|
||||||
clearml-session - CLI for launching JupyterLab / VSCode on a remote machine
|
clearml-session - CLI for launching JupyterLab / VSCode on a remote machine
|
||||||
|
|
||||||
@ -244,23 +247,34 @@ optional arguments:
|
|||||||
--version Display the clearml-session utility version
|
--version Display the clearml-session utility version
|
||||||
--attach [ATTACH] Attach to running interactive session (default:
|
--attach [ATTACH] Attach to running interactive session (default:
|
||||||
previous session)
|
previous session)
|
||||||
--debugging DEBUGGING
|
--debugging-session DEBUGGING_SESSION
|
||||||
Pass existing Task id (experiment), create a copy of
|
Pass existing Task id (experiment), create a copy of
|
||||||
the experiment on a remote machine, and launch
|
the experiment on a remote machine, and launch
|
||||||
jupyter/ssh for interactive access. Example
|
jupyter/ssh for interactive access. Example
|
||||||
--debugging <task_id>
|
--debugging-session <task_id>
|
||||||
--queue QUEUE Select the queue to launch the interactive session on
|
--queue QUEUE Select the queue to launch the interactive session on
|
||||||
(default: previously used queue)
|
(default: previously used queue)
|
||||||
--docker DOCKER Select the docker image to use in the interactive
|
--docker DOCKER Select the docker image to use in the interactive
|
||||||
session on (default: previously used docker image or
|
session on (default: previously used docker image or
|
||||||
`nvidia/cuda:10.1-runtime-ubuntu18.04`)
|
`nvidia/cuda:10.1-runtime-ubuntu18.04`)
|
||||||
|
--docker-args DOCKER_ARGS
|
||||||
|
Add additional arguments for the docker image to use
|
||||||
|
in the interactive session on (default: previously
|
||||||
|
used docker-args)
|
||||||
--public-ip [true/false]
|
--public-ip [true/false]
|
||||||
If True register the public IP of the remote machine.
|
If True register the public IP of the remote machine.
|
||||||
Set if running on the cloud. Default: false (use for
|
Set if running on the cloud. Default: false (use for
|
||||||
local / on-premises)
|
local / on-premises)
|
||||||
|
--remote-ssh-port REMOTE_SSH_PORT
|
||||||
|
Set the remote ssh server port, running on the agent`s
|
||||||
|
machine. (default: 10022)
|
||||||
--vscode-server [true/false]
|
--vscode-server [true/false]
|
||||||
Install vscode server (code-server) on interactive
|
Install vscode server (code-server) on interactive
|
||||||
session (default: true)
|
session (default: true)
|
||||||
|
--vscode-version VSCODE_VERSION
|
||||||
|
Set vscode server (code-server) version, as well as
|
||||||
|
vscode python extension version <vscode:python-ext>
|
||||||
|
(example: "3.7.4:2020.10.332292344")
|
||||||
--jupyter-lab [true/false]
|
--jupyter-lab [true/false]
|
||||||
Install Jupyter-Lab on interactive session (default:
|
Install Jupyter-Lab on interactive session (default:
|
||||||
true)
|
true)
|
||||||
@ -287,17 +301,19 @@ optional arguments:
|
|||||||
Advanced: Change the configuration file used to store
|
Advanced: Change the configuration file used to store
|
||||||
the previous state (default: ~/.clearml_session.json)
|
the previous state (default: ~/.clearml_session.json)
|
||||||
--remote-gateway [REMOTE_GATEWAY]
|
--remote-gateway [REMOTE_GATEWAY]
|
||||||
Advanced: Specify gateway ip/address to be passed to
|
Advanced: Specify gateway ip/address:port to be passed
|
||||||
interactive session (for use with k8s ingestion / ELB)
|
to interactive session (for use with k8s ingestion /
|
||||||
|
ELB)
|
||||||
--base-task-id BASE_TASK_ID
|
--base-task-id BASE_TASK_ID
|
||||||
Advanced: Set the base task ID for the interactive
|
Advanced: Set the base task ID for the interactive
|
||||||
session. (default: previously used Task). Use `none`
|
session. (default: previously used Task). Use `none`
|
||||||
for the default interactive session
|
for the default interactive session
|
||||||
--project PROJECT Advanced: Set the project name for the interactive
|
--project PROJECT Advanced: Set the project name for the interactive
|
||||||
session Task
|
session Task
|
||||||
--disable-keepalive Advanced: If set, disable the transparent proxy always
|
--keepalive [true/false]
|
||||||
keeping the sockets alive. Default: false, use
|
Advanced: If set, enables the transparent proxy always
|
||||||
transparent socket mitigating connection drops.
|
keeping the sockets alive. Default: False, do not use
|
||||||
|
transparent socket for mitigating connection drops.
|
||||||
--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]
|
--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]
|
||||||
Advanced: Excluded queues with this specific tag from
|
Advanced: Excluded queues with this specific tag from
|
||||||
the selection
|
the selection
|
||||||
@ -313,6 +329,8 @@ optional arguments:
|
|||||||
used one)
|
used one)
|
||||||
--username USERNAME Advanced: Select ssh username for the interactive
|
--username USERNAME Advanced: Select ssh username for the interactive
|
||||||
session (default: `root` or previously used one)
|
session (default: `root` or previously used one)
|
||||||
|
--verbose Advanced: If set, print verbose progress information,
|
||||||
|
e.g. the remote machine setup process log
|
||||||
|
|
||||||
Notice! all arguments are stored as new defaults for the next session
|
Notice! all arguments are stored as new defaults for the next session
|
||||||
```
|
```
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@ -20,6 +21,7 @@ import psutil
|
|||||||
from clearml import Task
|
from clearml import Task
|
||||||
from clearml.backend_api.session.client import APIClient
|
from clearml.backend_api.session.client import APIClient
|
||||||
from clearml.config import config_obj
|
from clearml.config import config_obj
|
||||||
|
from clearml.backend_api import Session
|
||||||
from .tcp_proxy import TcpProxy
|
from .tcp_proxy import TcpProxy
|
||||||
from .single_thread_proxy import SingleThreadProxy
|
from .single_thread_proxy import SingleThreadProxy
|
||||||
|
|
||||||
@ -136,21 +138,41 @@ def create_base_task(state, project_name=None, task_name=None):
|
|||||||
task = Task.create(project_name=project_name or 'DevOps',
|
task = Task.create(project_name=project_name or 'DevOps',
|
||||||
task_name=task_name or 'Interactive Session',
|
task_name=task_name or 'Interactive Session',
|
||||||
task_type=Task.TaskTypes.application)
|
task_type=Task.TaskTypes.application)
|
||||||
task_state = task.export_task()
|
task_script = task.data.script.to_dict()
|
||||||
base_script_file = os.path.abspath(os.path.join(__file__, '..', 'tcp_proxy.py'))
|
base_script_file = os.path.abspath(os.path.join(__file__, '..', 'tcp_proxy.py'))
|
||||||
with open(base_script_file, 'rt') as f:
|
with open(base_script_file, 'rt') as f:
|
||||||
task_state['script']['diff'] = f.read()
|
task_script['diff'] = f.read()
|
||||||
base_script_file = os.path.abspath(os.path.join(__file__, '..', 'interactive_session_task.py'))
|
base_script_file = os.path.abspath(os.path.join(__file__, '..', 'interactive_session_task.py'))
|
||||||
with open(base_script_file, 'rt') as f:
|
with open(base_script_file, 'rt') as f:
|
||||||
task_state['script']['diff'] += '\n\n' + f.read()
|
task_script['diff'] += '\n\n' + f.read()
|
||||||
|
|
||||||
task_state['script']['working_dir'] = '.'
|
task_script['working_dir'] = '.'
|
||||||
task_state['script']['entry_point'] = 'interactive_session.py'
|
task_script['entry_point'] = 'interactive_session.py'
|
||||||
task_state['script']['requirements'] = {'pip': '\n'.join(
|
task_script['requirements'] = {'pip': '\n'.join(
|
||||||
["clearml"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) +
|
["clearml>=1.1.5"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) +
|
||||||
(['pylint'] if state.get('vscode_server') else []))}
|
(['pylint'] if state.get('vscode_server') else []))}
|
||||||
task.update_task(task_state)
|
|
||||||
section, _, _ = _get_config_section_name()
|
section, _, _ = _get_config_section_name()
|
||||||
|
|
||||||
|
if Session.check_min_api_version('2.13'):
|
||||||
|
_runtime_prop = dict(task._get_runtime_properties())
|
||||||
|
_runtime_prop.update({
|
||||||
|
"_user_key": '',
|
||||||
|
"_user_secret": '',
|
||||||
|
"_jupyter_token": '',
|
||||||
|
"_ssh_password": "training",
|
||||||
|
})
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._set_runtime_properties(_runtime_prop)
|
||||||
|
task.set_parameters({
|
||||||
|
"{}/user_base_directory".format(section): "~/",
|
||||||
|
"{}/ssh_server".format(section): True,
|
||||||
|
"{}/default_docker".format(section): "nvidia/cuda",
|
||||||
|
"properties/external_address": '',
|
||||||
|
"properties/internal_ssh_port": '',
|
||||||
|
"properties/jupyter_port": '',
|
||||||
|
})
|
||||||
|
else:
|
||||||
task.set_parameters({
|
task.set_parameters({
|
||||||
"{}/user_base_directory".format(section): "~/",
|
"{}/user_base_directory".format(section): "~/",
|
||||||
"{}/ssh_server".format(section): True,
|
"{}/ssh_server".format(section): True,
|
||||||
@ -163,8 +185,12 @@ def create_base_task(state, project_name=None, task_name=None):
|
|||||||
"properties/jupyter_token": '',
|
"properties/jupyter_token": '',
|
||||||
"properties/jupyter_port": '',
|
"properties/jupyter_port": '',
|
||||||
})
|
})
|
||||||
|
|
||||||
task.set_system_tags([system_tag])
|
task.set_system_tags([system_tag])
|
||||||
task.reset(force=True)
|
|
||||||
|
# only update the data at the end, so reload requests are smaller
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._edit(script=task_script)
|
||||||
return task
|
return task
|
||||||
|
|
||||||
|
|
||||||
@ -197,6 +223,26 @@ def create_debugging_task(state, debug_task_id):
|
|||||||
(['pylint'] if state.get('vscode_server') else [])
|
(['pylint'] if state.get('vscode_server') else [])
|
||||||
task.update_task(task_state)
|
task.update_task(task_state)
|
||||||
section, _, _ = _get_config_section_name()
|
section, _, _ = _get_config_section_name()
|
||||||
|
|
||||||
|
if Session.check_min_api_version('2.13'):
|
||||||
|
_runtime_prop = dict(task._get_runtime_properties())
|
||||||
|
_runtime_prop.update({
|
||||||
|
"_user_key": '',
|
||||||
|
"_user_secret": '',
|
||||||
|
"_jupyter_token": '',
|
||||||
|
"_ssh_password": "training",
|
||||||
|
})
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._set_runtime_properties(_runtime_prop)
|
||||||
|
task.set_parameters({
|
||||||
|
"{}/user_base_directory".format(section): "~/",
|
||||||
|
"{}/ssh_server".format(section): True,
|
||||||
|
"{}/default_docker".format(section): "nvidia/cuda",
|
||||||
|
"properties/external_address": '',
|
||||||
|
"properties/internal_ssh_port": '',
|
||||||
|
"properties/jupyter_port": '',
|
||||||
|
})
|
||||||
|
else:
|
||||||
task.set_parameters({
|
task.set_parameters({
|
||||||
"{}/user_base_directory".format(section): "~/",
|
"{}/user_base_directory".format(section): "~/",
|
||||||
"{}/ssh_server".format(section): True,
|
"{}/ssh_server".format(section): True,
|
||||||
@ -214,7 +260,7 @@ def create_debugging_task(state, debug_task_id):
|
|||||||
return task
|
return task
|
||||||
|
|
||||||
|
|
||||||
def delete_old_tasks(client, base_task_id):
|
def delete_old_tasks(state, client, base_task_id):
|
||||||
print('Removing stale interactive sessions')
|
print('Removing stale interactive sessions')
|
||||||
current_user_id = _get_user_id(client)
|
current_user_id = _get_user_id(client)
|
||||||
previous_tasks = client.tasks.get_all(**{
|
previous_tasks = client.tasks.get_all(**{
|
||||||
@ -222,9 +268,13 @@ def delete_old_tasks(client, base_task_id):
|
|||||||
'parent': base_task_id or None,
|
'parent': base_task_id or None,
|
||||||
'system_tags': None if base_task_id else [system_tag],
|
'system_tags': None if base_task_id else [system_tag],
|
||||||
'page_size': 100, 'page': 0,
|
'page_size': 100, 'page': 0,
|
||||||
'user': [current_user_id], 'only_fields': ['id']
|
'user': [current_user_id],
|
||||||
|
'only_fields': ['id']
|
||||||
})
|
})
|
||||||
for t in previous_tasks:
|
|
||||||
|
for i, t in enumerate(previous_tasks):
|
||||||
|
if state.get('verbose'):
|
||||||
|
print('Removing {}/{} stale sessions'.format(i+1, len(previous_tasks)))
|
||||||
try:
|
try:
|
||||||
client.tasks.delete(task=t.id, force=True)
|
client.tasks.delete(task=t.id, force=True)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
@ -265,6 +315,17 @@ def _get_user_id(client):
|
|||||||
return current_user_id
|
return current_user_id
|
||||||
|
|
||||||
|
|
||||||
|
def _b64_encode_file(file):
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
import gzip
|
||||||
|
with open(file, 'rt') as f:
|
||||||
|
git_credentials = gzip.compress(f.read().encode('utf8'))
|
||||||
|
return base64.encodebytes(git_credentials).decode('ascii')
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def get_project_id(state):
|
def get_project_id(state):
|
||||||
project_id = None
|
project_id = None
|
||||||
project_name = state.get('project') or None
|
project_name = state.get('project') or None
|
||||||
@ -383,8 +444,8 @@ def load_state(state_file):
|
|||||||
state = json.load(f)
|
state = json.load(f)
|
||||||
except Exception:
|
except Exception:
|
||||||
state = {}
|
state = {}
|
||||||
# never reload --debug state
|
# never reload --verbose state
|
||||||
state.pop('debug', None)
|
state.pop('verbose', None)
|
||||||
return state
|
return state
|
||||||
|
|
||||||
|
|
||||||
@ -402,13 +463,31 @@ def clone_task(state, project_id):
|
|||||||
task = create_base_task(state, project_name=state.get('project'))
|
task = create_base_task(state, project_name=state.get('project'))
|
||||||
new_task = True
|
new_task = True
|
||||||
|
|
||||||
|
print('Configuring new session')
|
||||||
|
runtime_prop_support = Session.check_min_api_version("2.13")
|
||||||
|
if runtime_prop_support:
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
runtime_properties = dict(task._get_runtime_properties() or {})
|
||||||
|
runtime_properties['_jupyter_token'] = ''
|
||||||
|
runtime_properties['_ssh_password'] = str(state['password'])
|
||||||
|
runtime_properties['_user_key'] = str(config_obj.get("api.credentials.access_key"))
|
||||||
|
runtime_properties['_user_secret'] = (config_obj.get("api.credentials.secret_key"))
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._set_runtime_properties(runtime_properties)
|
||||||
|
|
||||||
task_params = task.get_parameters(backwards_compatibility=False)
|
task_params = task.get_parameters(backwards_compatibility=False)
|
||||||
if 'General/ssh_server' in task_params:
|
if 'General/ssh_server' in task_params:
|
||||||
section = 'General'
|
section = 'General'
|
||||||
init_section = 'init_script'
|
init_section = 'init_script'
|
||||||
else:
|
else:
|
||||||
section, _, init_section = _get_config_section_name()
|
section, _, init_section = _get_config_section_name()
|
||||||
|
|
||||||
|
if not runtime_prop_support:
|
||||||
task_params['properties/jupyter_token'] = ''
|
task_params['properties/jupyter_token'] = ''
|
||||||
|
task_params['{}/ssh_password'.format(section)] = state['password']
|
||||||
|
task_params['{}/user_key'.format(section)] = config_obj.get("api.credentials.access_key")
|
||||||
|
task_params['{}/user_secret'.format(section)] = config_obj.get("api.credentials.secret_key")
|
||||||
|
|
||||||
task_params['properties/jupyter_port'] = ''
|
task_params['properties/jupyter_port'] = ''
|
||||||
if state.get('remote_gateway') is not None:
|
if state.get('remote_gateway') is not None:
|
||||||
remote_gateway_parts = str(state.get('remote_gateway')).split(':')
|
remote_gateway_parts = str(state.get('remote_gateway')).split(':')
|
||||||
@ -416,9 +495,6 @@ def clone_task(state, project_id):
|
|||||||
if len(remote_gateway_parts) > 1:
|
if len(remote_gateway_parts) > 1:
|
||||||
task_params['properties/external_ssh_port'] = remote_gateway_parts[1]
|
task_params['properties/external_ssh_port'] = remote_gateway_parts[1]
|
||||||
task_params['{}/ssh_server'.format(section)] = str(True)
|
task_params['{}/ssh_server'.format(section)] = str(True)
|
||||||
task_params['{}/ssh_password'.format(section)] = state['password']
|
|
||||||
task_params['{}/user_key'.format(section)] = config_obj.get("api.credentials.access_key")
|
|
||||||
task_params['{}/user_secret'.format(section)] = config_obj.get("api.credentials.secret_key")
|
|
||||||
task_params["{}/jupyterlab".format(section)] = bool(state.get('jupyter_lab'))
|
task_params["{}/jupyterlab".format(section)] = bool(state.get('jupyter_lab'))
|
||||||
task_params["{}/vscode_server".format(section)] = bool(state.get('vscode_server'))
|
task_params["{}/vscode_server".format(section)] = bool(state.get('vscode_server'))
|
||||||
task_params["{}/public_ip".format(section)] = bool(state.get('public_ip'))
|
task_params["{}/public_ip".format(section)] = bool(state.get('public_ip'))
|
||||||
@ -443,11 +519,28 @@ def clone_task(state, project_id):
|
|||||||
# store the .git-credentials
|
# store the .git-credentials
|
||||||
if state.get('git_credentials'):
|
if state.get('git_credentials'):
|
||||||
git_cred_file = os.path.join(os.path.expanduser('~'), '.git-credentials')
|
git_cred_file = os.path.join(os.path.expanduser('~'), '.git-credentials')
|
||||||
if os.path.isfile(git_cred_file):
|
git_conf_file = os.path.join(os.path.expanduser('~'), '.gitconfig')
|
||||||
|
if not os.path.isfile(git_cred_file):
|
||||||
|
git_cred_file = None
|
||||||
|
if not os.path.isfile(git_conf_file):
|
||||||
|
git_conf_file = None
|
||||||
|
|
||||||
|
if runtime_prop_support:
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
runtime_properties = dict(task._get_runtime_properties() or {})
|
||||||
|
if git_cred_file:
|
||||||
|
runtime_properties['_git_credentials'] = _b64_encode_file(git_cred_file)
|
||||||
|
if git_conf_file:
|
||||||
|
runtime_properties['_git_config'] = _b64_encode_file(git_conf_file)
|
||||||
|
# store back
|
||||||
|
if git_cred_file or git_conf_file:
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._set_runtime_properties(runtime_properties)
|
||||||
|
else:
|
||||||
|
if git_cred_file:
|
||||||
task.connect_configuration(
|
task.connect_configuration(
|
||||||
configuration=git_cred_file, name='git_credentials', description='git credentials')
|
configuration=git_cred_file, name='git_credentials', description='git credentials')
|
||||||
git_conf_file = os.path.join(os.path.expanduser('~'), '.gitconfig')
|
if git_conf_file:
|
||||||
if os.path.isfile(git_conf_file):
|
|
||||||
task.connect_configuration(
|
task.connect_configuration(
|
||||||
configuration=git_conf_file, name='git_config', description='git config')
|
configuration=git_conf_file, name='git_config', description='git config')
|
||||||
|
|
||||||
@ -476,21 +569,28 @@ def wait_for_machine(state, task):
|
|||||||
# wait until task is running
|
# wait until task is running
|
||||||
print('Waiting for remote machine allocation [id={}]'.format(task.id))
|
print('Waiting for remote machine allocation [id={}]'.format(task.id))
|
||||||
last_status = None
|
last_status = None
|
||||||
while last_status != 'in_progress' and last_status in (None, 'created', 'queued', 'unknown',):
|
last_message = None
|
||||||
|
stopped_counter = 0
|
||||||
|
while last_status != 'in_progress' and last_status in (None, 'created', 'queued', 'unknown', 'stopped'):
|
||||||
print('.', end='', flush=True)
|
print('.', end='', flush=True)
|
||||||
if last_status is not None:
|
if last_status is not None:
|
||||||
sleep(2.)
|
sleep(2.)
|
||||||
status = task.get_status()
|
stopped_counter = (stopped_counter+1) if last_status == 'stopped' else 0
|
||||||
if last_status != status:
|
if stopped_counter > 5:
|
||||||
|
break
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
last_status = task._get_status()[1]
|
status, message = task._get_status()
|
||||||
print('Status [{}]{}'.format(status, ' - {}'.format(last_status) if last_status else ''))
|
status = str(status)
|
||||||
|
if last_status != status or last_message != message:
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
print('Status [{}]{} {}'.format(status, ' - {}'.format(last_status) if last_status else '', message))
|
||||||
last_status = status
|
last_status = status
|
||||||
|
last_message = message
|
||||||
|
|
||||||
print('Remote machine allocated')
|
print('Remote machine allocated')
|
||||||
print('Setting remote environment [Task id={}]'.format(task.id))
|
print('Setting remote environment [Task id={}]'.format(task.id))
|
||||||
print('Setup process details: {}'.format(task.get_output_log_web_page()))
|
print('Setup process details: {}'.format(task.get_output_log_web_page()))
|
||||||
print('Waiting for environment setup to complete [usually about 20-30 seconds]')
|
print('Waiting for environment setup to complete [usually about 20-30 seconds, see last log line/s below]')
|
||||||
# monitor progress, until we get the new jupyter, then we know it is working
|
# monitor progress, until we get the new jupyter, then we know it is working
|
||||||
task.reload()
|
task.reload()
|
||||||
|
|
||||||
@ -513,24 +613,38 @@ def wait_for_machine(state, task):
|
|||||||
last_lines = []
|
last_lines = []
|
||||||
period_counter = 0
|
period_counter = 0
|
||||||
while any(bool(not task.get_parameter(p)) for p in wait_properties) and task.get_status() == 'in_progress':
|
while any(bool(not task.get_parameter(p)) for p in wait_properties) and task.get_status() == 'in_progress':
|
||||||
lines = task.get_reported_console_output(10) if state.get('debug') else []
|
lines = task.get_reported_console_output(10 if state.get('verbose') else 1)
|
||||||
if last_lines != lines:
|
if last_lines != lines:
|
||||||
# new line if we had '.' counter in the previous run
|
# new line if we had '.' counter in the previous run
|
||||||
if period_counter:
|
if period_counter:
|
||||||
|
if state.get('verbose'):
|
||||||
print('')
|
print('')
|
||||||
period_counter = 0
|
period_counter = 0
|
||||||
try:
|
try:
|
||||||
index = next(i for i, line in enumerate(lines) if last_lines and line == last_lines[-1])
|
index = next(i for i, line in enumerate(lines) if last_lines and line == last_lines[-1])
|
||||||
print('> ' + ''.join(lines[index+1:]).rstrip().replace('\n', '\n> '))
|
print_line = '> ' + ''.join(lines[index+1:]).rstrip().replace('\n', '\n> ')
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
print('> ' + ''.join(lines).rstrip().replace('\n', '\n> '))
|
print_line = '> ' + ''.join(lines).rstrip().replace('\n', '\n> ')
|
||||||
|
|
||||||
|
if state.get('verbose'):
|
||||||
|
print(print_line)
|
||||||
|
else:
|
||||||
|
print_line = [l for l in print_line.split('\n') if l.rstrip()]
|
||||||
|
if print_line:
|
||||||
|
print('\r' + print_line[-1], end='', flush=True)
|
||||||
last_lines = lines
|
last_lines = lines
|
||||||
else:
|
else:
|
||||||
print('.', end='', flush=True)
|
|
||||||
period_counter += 1
|
period_counter += 1
|
||||||
|
print(('' if state.get('verbose') else '\r') + '.'*period_counter, end='', flush=True)
|
||||||
|
|
||||||
sleep(3.)
|
sleep(3.)
|
||||||
task.reload()
|
task.reload()
|
||||||
|
|
||||||
|
# clear the line
|
||||||
|
if not state.get('verbose'):
|
||||||
|
print('\r ', end='', flush=True)
|
||||||
|
print('\n')
|
||||||
|
|
||||||
if task.get_status() != 'in_progress':
|
if task.get_status() != 'in_progress':
|
||||||
raise ValueError("Remote setup failed (status={}) see details: {}".format(
|
raise ValueError("Remote setup failed (status={}) see details: {}".format(
|
||||||
task.get_status(), task.get_output_log_web_page()))
|
task.get_status(), task.get_output_log_web_page()))
|
||||||
@ -609,7 +723,7 @@ def monitor_ssh_tunnel(state, task):
|
|||||||
vscode_port = None
|
vscode_port = None
|
||||||
|
|
||||||
connect_state = {'reconnect': False}
|
connect_state = {'reconnect': False}
|
||||||
if not state.get('disable_keepalive'):
|
if state.get('keepalive'):
|
||||||
if state.get('jupyter_lab'):
|
if state.get('jupyter_lab'):
|
||||||
SingleThreadProxy(local_jupyter_port, local_jupyter_port_)
|
SingleThreadProxy(local_jupyter_port, local_jupyter_port_)
|
||||||
if state.get('vscode_server'):
|
if state.get('vscode_server'):
|
||||||
@ -628,18 +742,25 @@ def monitor_ssh_tunnel(state, task):
|
|||||||
if not all([ssh_port, jupyter_token, jupyter_port, internal_ssh_port, ssh_password, remote_address]):
|
if not all([ssh_port, jupyter_token, jupyter_port, internal_ssh_port, ssh_password, remote_address]):
|
||||||
task.reload()
|
task.reload()
|
||||||
task_parameters = task.get_parameters()
|
task_parameters = task.get_parameters()
|
||||||
|
if Session.check_min_api_version("2.13"):
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
runtime_prop = task._get_runtime_properties()
|
||||||
|
ssh_password = runtime_prop.get('_ssh_password') or state.get('password', '')
|
||||||
|
jupyter_token = runtime_prop.get('_jupyter_token')
|
||||||
|
else:
|
||||||
section = 'General' if 'General/ssh_server' in task_parameters else default_section
|
section = 'General' if 'General/ssh_server' in task_parameters else default_section
|
||||||
|
ssh_password = task_parameters.get('{}/ssh_password'.format(section)) or state.get('password', '')
|
||||||
|
jupyter_token = task_parameters.get('properties/jupyter_token')
|
||||||
|
|
||||||
remote_address = \
|
remote_address = \
|
||||||
task_parameters.get('properties/k8s-gateway-address') or \
|
task_parameters.get('properties/k8s-gateway-address') or \
|
||||||
task_parameters.get('properties/external_address')
|
task_parameters.get('properties/external_address')
|
||||||
ssh_password = task_parameters.get('{}/ssh_password'.format(section)) or state.get('password', '')
|
|
||||||
internal_ssh_port = task_parameters.get('properties/internal_ssh_port')
|
internal_ssh_port = task_parameters.get('properties/internal_ssh_port')
|
||||||
jupyter_port = task_parameters.get('properties/jupyter_port')
|
jupyter_port = task_parameters.get('properties/jupyter_port')
|
||||||
jupyter_token = task_parameters.get('properties/jupyter_token')
|
|
||||||
ssh_port = \
|
ssh_port = \
|
||||||
task_parameters.get('properties/k8s-pod-port') or \
|
task_parameters.get('properties/k8s-pod-port') or \
|
||||||
task_parameters.get('properties/external_ssh_port') or internal_ssh_port
|
task_parameters.get('properties/external_ssh_port') or internal_ssh_port
|
||||||
if not state.get('disable_keepalive'):
|
if state.get('keepalive'):
|
||||||
internal_ssh_port = task_parameters.get('properties/internal_stable_ssh_port') or internal_ssh_port
|
internal_ssh_port = task_parameters.get('properties/internal_stable_ssh_port') or internal_ssh_port
|
||||||
local_remote_pair_list = [(local_ssh_port_, internal_ssh_port)]
|
local_remote_pair_list = [(local_ssh_port_, internal_ssh_port)]
|
||||||
if state.get('jupyter_lab'):
|
if state.get('jupyter_lab'):
|
||||||
@ -671,7 +792,7 @@ def monitor_ssh_tunnel(state, task):
|
|||||||
state.get('username') or 'root',
|
state.get('username') or 'root',
|
||||||
remote_address, ssh_port, ssh_password,
|
remote_address, ssh_port, ssh_password,
|
||||||
local_remote_pair_list=local_remote_pair_list,
|
local_remote_pair_list=local_remote_pair_list,
|
||||||
debug=state.get('debug', False),
|
debug=state.get('verbose', False),
|
||||||
)
|
)
|
||||||
|
|
||||||
if ssh_process and ssh_process.isalive():
|
if ssh_process and ssh_process.isalive():
|
||||||
@ -684,9 +805,13 @@ def monitor_ssh_tunnel(state, task):
|
|||||||
msg += \
|
msg += \
|
||||||
'\nJupyter Lab URL: http://localhost:{local_jupyter_port}/?token={jupyter_token}'.format(
|
'\nJupyter Lab URL: http://localhost:{local_jupyter_port}/?token={jupyter_token}'.format(
|
||||||
local_jupyter_port=local_jupyter_port, jupyter_token=jupyter_token.rstrip())
|
local_jupyter_port=local_jupyter_port, jupyter_token=jupyter_token.rstrip())
|
||||||
|
if state.get('user_folder'):
|
||||||
|
msg += "&file-browser-path={}".format(state.get('user_folder'))
|
||||||
if vscode_port:
|
if vscode_port:
|
||||||
msg += '\nVSCode server available at http://localhost:{local_vscode_port}/'.format(
|
msg += '\nVSCode server available at http://localhost:{local_vscode_port}/'.format(
|
||||||
local_vscode_port=local_vscode_port)
|
local_vscode_port=local_vscode_port)
|
||||||
|
if state.get('user_folder'):
|
||||||
|
msg += "?folder={}".format(state.get('user_folder'))
|
||||||
print(msg)
|
print(msg)
|
||||||
|
|
||||||
print('\nConnection is up and running\n'
|
print('\nConnection is up and running\n'
|
||||||
@ -810,9 +935,10 @@ def setup_parser(parser):
|
|||||||
'(default: previously used Task). Use `none` for the default interactive session')
|
'(default: previously used Task). Use `none` for the default interactive session')
|
||||||
parser.add_argument('--project', type=str, default=None,
|
parser.add_argument('--project', type=str, default=None,
|
||||||
help='Advanced: Set the project name for the interactive session Task')
|
help='Advanced: Set the project name for the interactive session Task')
|
||||||
parser.add_argument('--disable-keepalive', action='store_true', default=None,
|
parser.add_argument('--keepalive', default=False, nargs='?', const='true', metavar='true/false',
|
||||||
help='Advanced: If set, disable the transparent proxy always keeping the sockets alive. '
|
type=lambda x: (str(x).strip().lower() in ('true', 'yes')),
|
||||||
'Default: false, use transparent socket mitigating connection drops.')
|
help='Advanced: If set, enables the transparent proxy always keeping the sockets alive. '
|
||||||
|
'Default: False, do not use transparent socket for mitigating connection drops.')
|
||||||
parser.add_argument('--queue-excluded-tag', default=None, nargs='*',
|
parser.add_argument('--queue-excluded-tag', default=None, nargs='*',
|
||||||
help='Advanced: Excluded queues with this specific tag from the selection')
|
help='Advanced: Excluded queues with this specific tag from the selection')
|
||||||
parser.add_argument('--queue-include-tag', default=None, nargs='*',
|
parser.add_argument('--queue-include-tag', default=None, nargs='*',
|
||||||
@ -826,8 +952,9 @@ def setup_parser(parser):
|
|||||||
parser.add_argument('--username', type=str, default=None,
|
parser.add_argument('--username', type=str, default=None,
|
||||||
help='Advanced: Select ssh username for the interactive session '
|
help='Advanced: Select ssh username for the interactive session '
|
||||||
'(default: `root` or previously used one)')
|
'(default: `root` or previously used one)')
|
||||||
parser.add_argument('--debug', action='store_true', default=None,
|
parser.add_argument('--verbose', action='store_true', default=None,
|
||||||
help='Advanced: If set, print debugging information')
|
help='Advanced: If set, print verbose progress information, '
|
||||||
|
'e.g. the remote machine setup process log')
|
||||||
|
|
||||||
|
|
||||||
def get_version():
|
def get_version():
|
||||||
@ -862,8 +989,8 @@ def cli():
|
|||||||
state_file = os.path.abspath(os.path.expandvars(os.path.expanduser(args.config_file)))
|
state_file = os.path.abspath(os.path.expandvars(os.path.expanduser(args.config_file)))
|
||||||
state = load_state(state_file)
|
state = load_state(state_file)
|
||||||
|
|
||||||
if args.debug:
|
if args.verbose:
|
||||||
state['debug'] = args.debug
|
state['verbose'] = args.verbose
|
||||||
|
|
||||||
client = APIClient()
|
client = APIClient()
|
||||||
|
|
||||||
@ -894,7 +1021,7 @@ def cli():
|
|||||||
project_id = get_project_id(state)
|
project_id = get_project_id(state)
|
||||||
|
|
||||||
# remove old Tasks created by us.
|
# remove old Tasks created by us.
|
||||||
delete_old_tasks(client, state.get('base_task_id'))
|
delete_old_tasks(state, client, state.get('base_task_id'))
|
||||||
|
|
||||||
# Clone the Task and adjust parameters
|
# Clone the Task and adjust parameters
|
||||||
task = clone_task(state, project_id)
|
task = clone_task(state, project_id)
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
import base64
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import socket
|
import socket
|
||||||
@ -13,6 +14,7 @@ import psutil
|
|||||||
from pathlib2 import Path
|
from pathlib2 import Path
|
||||||
|
|
||||||
from clearml import Task, StorageManager
|
from clearml import Task, StorageManager
|
||||||
|
from clearml.backend_api import Session
|
||||||
|
|
||||||
|
|
||||||
# noinspection SpellCheckingInspection
|
# noinspection SpellCheckingInspection
|
||||||
@ -97,7 +99,25 @@ def init_task(param, a_default_ssh_fingerprint):
|
|||||||
project_name="DevOps", task_name="Allocate Jupyter Notebook Instance", task_type=Task.TaskTypes.service)
|
project_name="DevOps", task_name="Allocate Jupyter Notebook Instance", task_type=Task.TaskTypes.service)
|
||||||
|
|
||||||
# Add jupyter server base folder
|
# Add jupyter server base folder
|
||||||
|
if Session.check_min_api_version('2.13'):
|
||||||
|
param.pop('user_key', None)
|
||||||
|
param.pop('user_secret', None)
|
||||||
|
param.pop('ssh_password', None)
|
||||||
task.connect(param, name=config_section_name)
|
task.connect(param, name=config_section_name)
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
runtime_prop = dict(task._get_runtime_properties())
|
||||||
|
# remove the user key/secret the moment we have it
|
||||||
|
param['user_key'] = runtime_prop.pop('_user_key', None)
|
||||||
|
param['user_secret'] = runtime_prop.pop('_user_secret', None)
|
||||||
|
# no need to reset, we will need it
|
||||||
|
param['ssh_password'] = runtime_prop.get('_ssh_password')
|
||||||
|
# Force removing properties
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._edit(runtime=runtime_prop)
|
||||||
|
task.reload()
|
||||||
|
else:
|
||||||
|
task.connect(param, name=config_section_name)
|
||||||
|
|
||||||
# connect ssh finger print configuration (with fallback if section is missing)
|
# connect ssh finger print configuration (with fallback if section is missing)
|
||||||
old_default_ssh_fingerprint = deepcopy(a_default_ssh_fingerprint)
|
old_default_ssh_fingerprint = deepcopy(a_default_ssh_fingerprint)
|
||||||
try:
|
try:
|
||||||
@ -123,16 +143,17 @@ def setup_os_env(param):
|
|||||||
"_API_SECRET_KEY",
|
"_API_SECRET_KEY",
|
||||||
"_API_HOST_VERIFY_CERT",
|
"_API_HOST_VERIFY_CERT",
|
||||||
"_DOCKER_IMAGE",
|
"_DOCKER_IMAGE",
|
||||||
|
"_DOCKER_BASH_SCRIPT",
|
||||||
)
|
)
|
||||||
# set default docker image, with network configuration
|
# set default docker image, with network configuration
|
||||||
if param.get('default_docker', '').strip():
|
if param.get('default_docker', '').strip():
|
||||||
os.environ["TRAINS_DOCKER_IMAGE"] = param['default_docker'].strip()
|
|
||||||
os.environ["CLEARML_DOCKER_IMAGE"] = param['default_docker'].strip()
|
os.environ["CLEARML_DOCKER_IMAGE"] = param['default_docker'].strip()
|
||||||
|
|
||||||
# setup os environment
|
# setup os environment
|
||||||
env = deepcopy(os.environ)
|
env = deepcopy(os.environ)
|
||||||
for key in os.environ:
|
for key in os.environ:
|
||||||
if (key.startswith("TRAINS") or key.startswith("CLEARML")) and not any(key.endswith(p) for p in preserve):
|
# only set CLEARML_ remove any TRAINS_
|
||||||
|
if key.startswith("TRAINS") or (key.startswith("CLEARML") and not any(key.endswith(p) for p in preserve)):
|
||||||
env.pop(key, None)
|
env.pop(key, None)
|
||||||
|
|
||||||
return env
|
return env
|
||||||
@ -188,8 +209,7 @@ def monitor_jupyter_server(fd, local_filename, process, task, jupyter_port, host
|
|||||||
# we could not locate the token, try again
|
# we could not locate the token, try again
|
||||||
if not token:
|
if not token:
|
||||||
continue
|
continue
|
||||||
# update the task with the correct links and token
|
|
||||||
task.set_parameter(name='properties/jupyter_token', value=str(token))
|
|
||||||
# we ignore the reported port, because jupyter server will get confused
|
# we ignore the reported port, because jupyter server will get confused
|
||||||
# if we have multiple servers running and will point to the wrong port/server
|
# if we have multiple servers running and will point to the wrong port/server
|
||||||
task.set_parameter(name='properties/jupyter_port', value=str(jupyter_port))
|
task.set_parameter(name='properties/jupyter_port', value=str(jupyter_port))
|
||||||
@ -197,9 +217,21 @@ def monitor_jupyter_server(fd, local_filename, process, task, jupyter_port, host
|
|||||||
'https' if "https://" in line else 'http',
|
'https' if "https://" in line else 'http',
|
||||||
hostnames, jupyter_port, token
|
hostnames, jupyter_port, token
|
||||||
)
|
)
|
||||||
print('\nJupyter Lab URL: {}\n'.format(jupyter_url))
|
|
||||||
|
# update the task with the correct links and token
|
||||||
|
if Session.check_min_api_version("2.13"):
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
runtime_prop = task._get_runtime_properties()
|
||||||
|
runtime_prop['_jupyter_token'] = str(token)
|
||||||
|
runtime_prop['_jupyter_url'] = str(jupyter_url)
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._set_runtime_properties(runtime_prop)
|
||||||
|
else:
|
||||||
|
task.set_parameter(name='properties/jupyter_token', value=str(token))
|
||||||
task.set_parameter(name='properties/jupyter_url', value=jupyter_url)
|
task.set_parameter(name='properties/jupyter_url', value=jupyter_url)
|
||||||
|
|
||||||
|
print('\nJupyter Lab URL: {}\n'.format(jupyter_url))
|
||||||
|
|
||||||
# cleanup
|
# cleanup
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
@ -219,8 +251,8 @@ def start_vscode_server(hostname, hostnames, param, task, env):
|
|||||||
|
|
||||||
# get vscode version and python extension version
|
# get vscode version and python extension version
|
||||||
# they are extremely flaky, this combination works, most do not.
|
# they are extremely flaky, this combination works, most do not.
|
||||||
vscode_version = '3.9.2'
|
vscode_version = '3.12.0'
|
||||||
python_ext_version = '2021.3.658691958'
|
python_ext_version = '2021.10.1365161279'
|
||||||
if param.get("vscode_version"):
|
if param.get("vscode_version"):
|
||||||
vscode_version_parts = param.get("vscode_version").split(':')
|
vscode_version_parts = param.get("vscode_version").split(':')
|
||||||
vscode_version = vscode_version_parts[0]
|
vscode_version = vscode_version_parts[0]
|
||||||
@ -231,10 +263,22 @@ def start_vscode_server(hostname, hostnames, param, task, env):
|
|||||||
env = dict(**env)
|
env = dict(**env)
|
||||||
env.pop('PYTHONPATH', None)
|
env.pop('PYTHONPATH', None)
|
||||||
|
|
||||||
|
pre_installed = False
|
||||||
|
python_ext = None
|
||||||
|
|
||||||
# find a free tcp port
|
# find a free tcp port
|
||||||
port = get_free_port(9000, 9100)
|
port = get_free_port(9000, 9100)
|
||||||
|
|
||||||
if os.geteuid() == 0:
|
if os.geteuid() == 0:
|
||||||
|
# check if preinstalled
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
vscode_path = subprocess.check_output('which code-server', shell=True).decode().strip()
|
||||||
|
pre_installed = bool(vscode_path)
|
||||||
|
except Exception:
|
||||||
|
vscode_path = None
|
||||||
|
|
||||||
|
if not vscode_path:
|
||||||
# installing VSCODE:
|
# installing VSCODE:
|
||||||
try:
|
try:
|
||||||
python_ext = StorageManager.get_local_copy(
|
python_ext = StorageManager.get_local_copy(
|
||||||
@ -252,6 +296,7 @@ def start_vscode_server(hostname, hostnames, param, task, env):
|
|||||||
vscode_path = 'code-server'
|
vscode_path = 'code-server'
|
||||||
else:
|
else:
|
||||||
python_ext = None
|
python_ext = None
|
||||||
|
pre_installed = True
|
||||||
# check if code-server exists
|
# check if code-server exists
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
@ -280,6 +325,14 @@ def start_vscode_server(hostname, hostnames, param, task, env):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
fd, local_filename = mkstemp()
|
fd, local_filename = mkstemp()
|
||||||
|
if pre_installed:
|
||||||
|
user_folder = os.path.expanduser("~/.local/share/code-server/")
|
||||||
|
if not os.path.isdir(user_folder):
|
||||||
|
user_folder = None
|
||||||
|
exts_folder = None
|
||||||
|
else:
|
||||||
|
exts_folder = os.path.expanduser("~/.local/share/code-server/extensions/")
|
||||||
|
else:
|
||||||
subprocess.Popen(
|
subprocess.Popen(
|
||||||
[
|
[
|
||||||
vscode_path,
|
vscode_path,
|
||||||
@ -291,11 +344,13 @@ def start_vscode_server(hostname, hostnames, param, task, env):
|
|||||||
"--extensions-dir", exts_folder,
|
"--extensions-dir", exts_folder,
|
||||||
"--install-extension", "ms-toolsai.jupyter",
|
"--install-extension", "ms-toolsai.jupyter",
|
||||||
# "--install-extension", "donjayamanne.python-extension-pack"
|
# "--install-extension", "donjayamanne.python-extension-pack"
|
||||||
] + ["--install-extension", python_ext] if python_ext else [],
|
] + ["--install-extension", "ms-python.python@{}".format(python_ext_version)] if python_ext else [],
|
||||||
env=env,
|
env=env,
|
||||||
stdout=fd,
|
stdout=fd,
|
||||||
stderr=fd,
|
stderr=fd,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if user_folder:
|
||||||
settings = Path(os.path.expanduser(os.path.join(user_folder, 'User/settings.json')))
|
settings = Path(os.path.expanduser(os.path.join(user_folder, 'User/settings.json')))
|
||||||
settings.parent.mkdir(parents=True, exist_ok=True)
|
settings.parent.mkdir(parents=True, exist_ok=True)
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
@ -316,15 +371,19 @@ def start_vscode_server(hostname, hostnames, param, task, env):
|
|||||||
json.dump(base_json, f)
|
json.dump(base_json, f)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
proc = subprocess.Popen(
|
proc = subprocess.Popen(
|
||||||
['bash', '-c',
|
['bash', '-c',
|
||||||
'{} --auth none --bind-addr 127.0.0.1:{} --disable-update-check '
|
'{} --auth none --bind-addr 127.0.0.1:{} --disable-update-check {} {}'.format(
|
||||||
'--user-data-dir {} --extensions-dir {}'.format(vscode_path, port, user_folder, exts_folder)],
|
vscode_path, port,
|
||||||
|
'--user-data-dir \"{}\"'.format(user_folder) if user_folder else '',
|
||||||
|
'--extensions-dir \"{}\"'.format(exts_folder) if exts_folder else '')],
|
||||||
env=env,
|
env=env,
|
||||||
stdout=fd,
|
stdout=fd,
|
||||||
stderr=fd,
|
stderr=fd,
|
||||||
cwd=cwd,
|
cwd=cwd,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
error_code = proc.wait(timeout=1)
|
error_code = proc.wait(timeout=1)
|
||||||
raise ValueError("code-server failed starting, return code {}".format(error_code))
|
raise ValueError("code-server failed starting, return code {}".format(error_code))
|
||||||
@ -343,7 +402,7 @@ def start_jupyter_server(hostname, hostnames, param, task, env):
|
|||||||
print('no jupyterlab to monitor - going to sleep')
|
print('no jupyterlab to monitor - going to sleep')
|
||||||
while True:
|
while True:
|
||||||
sleep(10.)
|
sleep(10.)
|
||||||
return
|
return # noqa
|
||||||
|
|
||||||
# execute jupyter notebook
|
# execute jupyter notebook
|
||||||
fd, local_filename = mkstemp()
|
fd, local_filename = mkstemp()
|
||||||
@ -418,15 +477,15 @@ def setup_ssh_server(hostname, hostnames, param, task):
|
|||||||
"&& " # noqa: W605
|
"&& " # noqa: W605
|
||||||
"echo 'ClientAliveInterval 10' >> /etc/ssh/sshd_config && "
|
"echo 'ClientAliveInterval 10' >> /etc/ssh/sshd_config && "
|
||||||
"echo 'ClientAliveCountMax 20' >> /etc/ssh/sshd_config && "
|
"echo 'ClientAliveCountMax 20' >> /etc/ssh/sshd_config && "
|
||||||
"echo 'AcceptEnv TRAINS_API_ACCESS_KEY TRAINS_API_SECRET_KEY "
|
"echo 'AcceptEnv CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY "
|
||||||
"CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY' >> /etc/ssh/sshd_config && "
|
"CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY' >> /etc/ssh/sshd_config && "
|
||||||
'echo "export VISIBLE=now" >> /etc/profile && '
|
'echo "export VISIBLE=now" >> /etc/profile && '
|
||||||
'echo "export PATH=$PATH" >> /etc/profile && '
|
'echo "export PATH=$PATH" >> /etc/profile && '
|
||||||
'echo "ldconfig" >> /etc/profile && '
|
'echo "ldconfig" 2>/dev/null >> /etc/profile && '
|
||||||
'echo "export TRAINS_CONFIG_FILE={trains_config_file}" >> /etc/profile'.format(
|
'echo "export CLEARML_CONFIG_FILE={trains_config_file}" >> /etc/profile'.format(
|
||||||
password=ssh_password,
|
password=ssh_password,
|
||||||
port=port,
|
port=port,
|
||||||
trains_config_file=os.environ.get("CLEARML_CONFIG_FILE") or os.environ.get("TRAINS_CONFIG_FILE"),
|
trains_config_file=os.environ.get("CLEARML_CONFIG_FILE") or os.environ.get("CLEARML_CONFIG_FILE"),
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
sshd_path = '/usr/sbin/sshd'
|
sshd_path = '/usr/sbin/sshd'
|
||||||
@ -449,7 +508,7 @@ def setup_ssh_server(hostname, hostnames, param, task):
|
|||||||
"UsePAM yes" + "\n"\
|
"UsePAM yes" + "\n"\
|
||||||
"AuthorizedKeysFile {}".format(os.path.join(ssh_config_path, 'authorized_keys')) + "\n"\
|
"AuthorizedKeysFile {}".format(os.path.join(ssh_config_path, 'authorized_keys')) + "\n"\
|
||||||
"PidFile {}".format(os.path.join(ssh_config_path, 'sshd.pid')) + "\n"\
|
"PidFile {}".format(os.path.join(ssh_config_path, 'sshd.pid')) + "\n"\
|
||||||
"AcceptEnv TRAINS_API_ACCESS_KEY TRAINS_API_SECRET_KEY "\
|
"AcceptEnv CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY "\
|
||||||
"CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY"+"\n"
|
"CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY"+"\n"
|
||||||
for k in default_ssh_fingerprint:
|
for k in default_ssh_fingerprint:
|
||||||
filename = os.path.join(ssh_config_path, '{}'.format(k.replace('__pub', '.pub')))
|
filename = os.path.join(ssh_config_path, '{}'.format(k.replace('__pub', '.pub')))
|
||||||
@ -511,13 +570,42 @@ def setup_ssh_server(hostname, hostnames, param, task):
|
|||||||
print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex))
|
print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex))
|
||||||
|
|
||||||
|
|
||||||
|
def _b64_decode_file(encoded_string):
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
import gzip
|
||||||
|
value = gzip.decompress(base64.decodebytes(encoded_string.encode('ascii'))).decode('utf8')
|
||||||
|
return value
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def setup_user_env(param, task):
|
def setup_user_env(param, task):
|
||||||
env = setup_os_env(param)
|
env = setup_os_env(param)
|
||||||
|
|
||||||
|
# apply vault if we have it
|
||||||
|
vault_environment = {}
|
||||||
|
if param.get("user_key") and param.get("user_secret"):
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
print('Applying vault configuration')
|
||||||
|
from clearml.backend_api.session.defs import ENV_ENABLE_ENV_CONFIG_SECTION, ENV_ENABLE_FILES_CONFIG_SECTION
|
||||||
|
prev_env, prev_files = ENV_ENABLE_ENV_CONFIG_SECTION.get(), ENV_ENABLE_FILES_CONFIG_SECTION.get()
|
||||||
|
ENV_ENABLE_ENV_CONFIG_SECTION.set(True), ENV_ENABLE_FILES_CONFIG_SECTION.set(True)
|
||||||
|
prev_envs = deepcopy(os.environ)
|
||||||
|
Session(api_key=param.get("user_key"), secret_key=param.get("user_secret"))
|
||||||
|
vault_environment = {k: v for k, v in os.environ.items() if prev_envs.get(k) != v}
|
||||||
|
ENV_ENABLE_ENV_CONFIG_SECTION.set(prev_env), ENV_ENABLE_FILES_CONFIG_SECTION.set(prev_files)
|
||||||
|
if vault_environment:
|
||||||
|
print('Vault environment added: {}'.format(list(vault_environment.keys())))
|
||||||
|
except Exception as ex:
|
||||||
|
print('Applying vault configuration failed: {}'.format(ex))
|
||||||
|
|
||||||
# do not change user bash/profile
|
# do not change user bash/profile
|
||||||
if os.geteuid() != 0:
|
if os.geteuid() != 0:
|
||||||
if param.get("user_key") and param.get("user_secret"):
|
if param.get("user_key") and param.get("user_secret"):
|
||||||
env['TRAINS_API_ACCESS_KEY'] = param.get("user_key")
|
env['CLEARML_API_ACCESS_KEY'] = param.get("user_key")
|
||||||
env['TRAINS_API_SECRET_KEY'] = param.get("user_secret")
|
env['CLEARML_API_SECRET_KEY'] = param.get("user_secret")
|
||||||
return env
|
return env
|
||||||
|
|
||||||
# create symbolic link to the venv
|
# create symbolic link to the venv
|
||||||
@ -530,20 +618,24 @@ def setup_user_env(param, task):
|
|||||||
pass
|
pass
|
||||||
# set default user credentials
|
# set default user credentials
|
||||||
if param.get("user_key") and param.get("user_secret"):
|
if param.get("user_key") and param.get("user_secret"):
|
||||||
os.system("echo 'export TRAINS_API_ACCESS_KEY=\"{}\"' >> ~/.bashrc".format(
|
os.system("echo 'export CLEARML_API_ACCESS_KEY=\"{}\"' >> ~/.bashrc".format(
|
||||||
param.get("user_key", "").replace('$', '\\$')))
|
param.get("user_key", "").replace('$', '\\$')))
|
||||||
os.system("echo 'export TRAINS_API_SECRET_KEY=\"{}\"' >> ~/.bashrc".format(
|
os.system("echo 'export CLEARML_API_SECRET_KEY=\"{}\"' >> ~/.bashrc".format(
|
||||||
param.get("user_secret", "").replace('$', '\\$')))
|
param.get("user_secret", "").replace('$', '\\$')))
|
||||||
os.system("echo 'export TRAINS_DOCKER_IMAGE=\"{}\"' >> ~/.bashrc".format(
|
os.system("echo 'export CLEARML_DOCKER_IMAGE=\"{}\"' >> ~/.bashrc".format(
|
||||||
param.get("default_docker", "").strip() or env.get('TRAINS_DOCKER_IMAGE', '')))
|
param.get("default_docker", "").strip() or env.get('CLEARML_DOCKER_IMAGE', '')))
|
||||||
os.system("echo 'export TRAINS_API_ACCESS_KEY=\"{}\"' >> ~/.profile".format(
|
os.system("echo 'export CLEARML_API_ACCESS_KEY=\"{}\"' >> ~/.profile".format(
|
||||||
param.get("user_key", "").replace('$', '\\$')))
|
param.get("user_key", "").replace('$', '\\$')))
|
||||||
os.system("echo 'export TRAINS_API_SECRET_KEY=\"{}\"' >> ~/.profile".format(
|
os.system("echo 'export CLEARML_API_SECRET_KEY=\"{}\"' >> ~/.profile".format(
|
||||||
param.get("user_secret", "").replace('$', '\\$')))
|
param.get("user_secret", "").replace('$', '\\$')))
|
||||||
os.system("echo 'export TRAINS_DOCKER_IMAGE=\"{}\"' >> ~/.profile".format(
|
os.system("echo 'export CLEARML_DOCKER_IMAGE=\"{}\"' >> ~/.profile".format(
|
||||||
param.get("default_docker", "").strip() or env.get('TRAINS_DOCKER_IMAGE', '')))
|
param.get("default_docker", "").strip() or env.get('CLEARML_DOCKER_IMAGE', '')))
|
||||||
env['TRAINS_API_ACCESS_KEY'] = param.get("user_key")
|
for k, v in vault_environment.items():
|
||||||
env['TRAINS_API_SECRET_KEY'] = param.get("user_secret")
|
os.system("echo 'export {}=\"{}\"' >> ~/.profile".format(k, v))
|
||||||
|
os.system("echo 'export {}=\"{}\"' >> ~/.bashrc".format(k, v))
|
||||||
|
env[k] = str(v) if v else ""
|
||||||
|
env['CLEARML_API_ACCESS_KEY'] = param.get("user_key")
|
||||||
|
env['CLEARML_API_SECRET_KEY'] = param.get("user_secret")
|
||||||
# set default folder for user
|
# set default folder for user
|
||||||
if param.get("user_base_directory"):
|
if param.get("user_base_directory"):
|
||||||
base_dir = param.get("user_base_directory")
|
base_dir = param.get("user_base_directory")
|
||||||
@ -557,8 +649,27 @@ def setup_user_env(param, task):
|
|||||||
os.system("echo '. {}' >> ~/.profile".format(os.path.join(environment, 'bin', 'activate')))
|
os.system("echo '. {}' >> ~/.profile".format(os.path.join(environment, 'bin', 'activate')))
|
||||||
|
|
||||||
# check if we need to create .git-credentials
|
# check if we need to create .git-credentials
|
||||||
|
|
||||||
|
runtime_property_support = Session.check_min_api_version("2.13")
|
||||||
|
if runtime_property_support:
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
runtime_prop = dict(task._get_runtime_properties())
|
||||||
|
git_credentials = runtime_prop.pop('_git_credentials', None)
|
||||||
|
git_config = runtime_prop.pop('_git_config', None)
|
||||||
|
# force removing properties
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._edit(runtime=runtime_prop)
|
||||||
|
task.reload()
|
||||||
|
if git_credentials is not None:
|
||||||
|
git_credentials = _b64_decode_file(git_credentials)
|
||||||
|
if git_config is not None:
|
||||||
|
git_config = _b64_decode_file(git_config)
|
||||||
|
else:
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
git_credentials = task._get_configuration_text('git_credentials')
|
git_credentials = task._get_configuration_text('git_credentials')
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
git_config = task._get_configuration_text('git_config')
|
||||||
|
|
||||||
if git_credentials:
|
if git_credentials:
|
||||||
git_cred_file = os.path.expanduser('~/.config/git/credentials')
|
git_cred_file = os.path.expanduser('~/.config/git/credentials')
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
@ -568,8 +679,7 @@ def setup_user_env(param, task):
|
|||||||
f.write(git_credentials)
|
f.write(git_credentials)
|
||||||
except Exception:
|
except Exception:
|
||||||
print('Could not write {} file'.format(git_cred_file))
|
print('Could not write {} file'.format(git_cred_file))
|
||||||
# noinspection PyProtectedMember
|
|
||||||
git_config = task._get_configuration_text('git_config')
|
|
||||||
if git_config:
|
if git_config:
|
||||||
git_config_file = os.path.expanduser('~/.config/git/config')
|
git_config_file = os.path.expanduser('~/.config/git/config')
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
|
@ -1 +1 @@
|
|||||||
__version__ = '0.3.4'
|
__version__ = '0.3.5'
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
clearml
|
clearml >= 1.1.5
|
||||||
pexpect ; sys_platform != 'win32'
|
pexpect ; sys_platform != 'win32'
|
||||||
wexpect ; sys_platform == 'win32'
|
wexpect ; sys_platform == 'win32'
|
||||||
|
Loading…
Reference in New Issue
Block a user