Change --debug to --verbose and added remote session setup progress print by default

Update default vscode_version to 3.12.0 and  python_ext_version to 2021.10.1365161279
Add message status reporting, fix k8s support
Add support for Task runtime properties
This commit is contained in:
allegroai 2021-11-18 10:43:46 +02:00
parent ae09bf3bed
commit 9c14f627b1
2 changed files with 271 additions and 93 deletions

View File

@ -1,3 +1,4 @@
import base64
import hashlib import hashlib
import json import json
import logging import logging
@ -20,6 +21,7 @@ import psutil
from clearml import Task from clearml import Task
from clearml.backend_api.session.client import APIClient from clearml.backend_api.session.client import APIClient
from clearml.config import config_obj from clearml.config import config_obj
from clearml.backend_api import Session
from .tcp_proxy import TcpProxy from .tcp_proxy import TcpProxy
from .single_thread_proxy import SingleThreadProxy from .single_thread_proxy import SingleThreadProxy
@ -136,35 +138,59 @@ def create_base_task(state, project_name=None, task_name=None):
task = Task.create(project_name=project_name or 'DevOps', task = Task.create(project_name=project_name or 'DevOps',
task_name=task_name or 'Interactive Session', task_name=task_name or 'Interactive Session',
task_type=Task.TaskTypes.application) task_type=Task.TaskTypes.application)
task_state = task.export_task() task_script = task.data.script.to_dict()
base_script_file = os.path.abspath(os.path.join(__file__, '..', 'tcp_proxy.py')) base_script_file = os.path.abspath(os.path.join(__file__, '..', 'tcp_proxy.py'))
with open(base_script_file, 'rt') as f: with open(base_script_file, 'rt') as f:
task_state['script']['diff'] = f.read() task_script['diff'] = f.read()
base_script_file = os.path.abspath(os.path.join(__file__, '..', 'interactive_session_task.py')) base_script_file = os.path.abspath(os.path.join(__file__, '..', 'interactive_session_task.py'))
with open(base_script_file, 'rt') as f: with open(base_script_file, 'rt') as f:
task_state['script']['diff'] += '\n\n' + f.read() task_script['diff'] += '\n\n' + f.read()
task_state['script']['working_dir'] = '.' task_script['working_dir'] = '.'
task_state['script']['entry_point'] = 'interactive_session.py' task_script['entry_point'] = 'interactive_session.py'
task_state['script']['requirements'] = {'pip': '\n'.join( task_script['requirements'] = {'pip': '\n'.join(
["clearml"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) + ["clearml"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) +
(['pylint'] if state.get('vscode_server') else []))} (['pylint'] if state.get('vscode_server') else []))}
task.update_task(task_state)
section, _, _ = _get_config_section_name() section, _, _ = _get_config_section_name()
task.set_parameters({
"{}/user_base_directory".format(section): "~/", if Session.check_min_api_version('2.13'):
"{}/ssh_server".format(section): True, _runtime_prop = dict(task._get_runtime_properties())
"{}/ssh_password".format(section): "training", _runtime_prop.update({
"{}/default_docker".format(section): "nvidia/cuda", "_user_key": '',
"{}/user_key".format(section): '', "_user_secret": '',
"{}/user_secret".format(section): '', "_jupyter_token": '',
"properties/external_address": '', "_ssh_password": "training",
"properties/internal_ssh_port": '', })
"properties/jupyter_token": '', # noinspection PyProtectedMember
"properties/jupyter_port": '', task._set_runtime_properties(_runtime_prop)
}) task.set_parameters({
"{}/user_base_directory".format(section): "~/",
"{}/ssh_server".format(section): True,
"{}/default_docker".format(section): "nvidia/cuda",
"properties/external_address": '',
"properties/internal_ssh_port": '',
"properties/jupyter_port": '',
})
else:
task.set_parameters({
"{}/user_base_directory".format(section): "~/",
"{}/ssh_server".format(section): True,
"{}/ssh_password".format(section): "training",
"{}/default_docker".format(section): "nvidia/cuda",
"{}/user_key".format(section): '',
"{}/user_secret".format(section): '',
"properties/external_address": '',
"properties/internal_ssh_port": '',
"properties/jupyter_token": '',
"properties/jupyter_port": '',
})
task.set_system_tags([system_tag]) task.set_system_tags([system_tag])
task.reset(force=True)
# only update the data at the end, so reload requests are smaller
# noinspection PyProtectedMember
task._edit(script=task_script)
return task return task
@ -197,18 +223,38 @@ def create_debugging_task(state, debug_task_id):
(['pylint'] if state.get('vscode_server') else []) (['pylint'] if state.get('vscode_server') else [])
task.update_task(task_state) task.update_task(task_state)
section, _, _ = _get_config_section_name() section, _, _ = _get_config_section_name()
task.set_parameters({
"{}/user_base_directory".format(section): "~/", if Session.check_min_api_version('2.13'):
"{}/ssh_server".format(section): True, _runtime_prop = dict(task._get_runtime_properties())
"{}/ssh_password".format(section): "training", _runtime_prop.update({
"{}/default_docker".format(section): "nvidia/cuda", "_user_key": '',
"{}/user_key".format(section): '', "_user_secret": '',
"{}/user_secret".format(section): '', "_jupyter_token": '',
"properties/external_address": '', "_ssh_password": "training",
"properties/internal_ssh_port": '', })
"properties/jupyter_token": '', # noinspection PyProtectedMember
"properties/jupyter_port": '', task._set_runtime_properties(_runtime_prop)
}) task.set_parameters({
"{}/user_base_directory".format(section): "~/",
"{}/ssh_server".format(section): True,
"{}/default_docker".format(section): "nvidia/cuda",
"properties/external_address": '',
"properties/internal_ssh_port": '',
"properties/jupyter_port": '',
})
else:
task.set_parameters({
"{}/user_base_directory".format(section): "~/",
"{}/ssh_server".format(section): True,
"{}/ssh_password".format(section): "training",
"{}/default_docker".format(section): "nvidia/cuda",
"{}/user_key".format(section): '',
"{}/user_secret".format(section): '',
"properties/external_address": '',
"properties/internal_ssh_port": '',
"properties/jupyter_token": '',
"properties/jupyter_port": '',
})
task.set_system_tags([system_tag]) task.set_system_tags([system_tag])
task.reset(force=True) task.reset(force=True)
return task return task
@ -265,6 +311,17 @@ def _get_user_id(client):
return current_user_id return current_user_id
def _b64_encode_file(file):
# noinspection PyBroadException
try:
import gzip
with open(file, 'rt') as f:
git_credentials = gzip.compress(f.read().encode('utf8'))
return base64.encodebytes(git_credentials).decode('ascii')
except Exception:
return None
def get_project_id(state): def get_project_id(state):
project_id = None project_id = None
project_name = state.get('project') or None project_name = state.get('project') or None
@ -383,8 +440,8 @@ def load_state(state_file):
state = json.load(f) state = json.load(f)
except Exception: except Exception:
state = {} state = {}
# never reload --debug state # never reload --verbose state
state.pop('debug', None) state.pop('verbose', None)
return state return state
@ -402,13 +459,31 @@ def clone_task(state, project_id):
task = create_base_task(state, project_name=state.get('project')) task = create_base_task(state, project_name=state.get('project'))
new_task = True new_task = True
print('Configuring new session')
runtime_prop_support = Session.check_min_api_version("2.13")
if runtime_prop_support:
# noinspection PyProtectedMember
runtime_properties = dict(task._get_runtime_properties() or {})
runtime_properties['_jupyter_token'] = ''
runtime_properties['_ssh_password'] = str(state['password'])
runtime_properties['_user_key'] = str(config_obj.get("api.credentials.access_key"))
runtime_properties['_user_secret'] = (config_obj.get("api.credentials.secret_key"))
# noinspection PyProtectedMember
task._set_runtime_properties(runtime_properties)
task_params = task.get_parameters(backwards_compatibility=False) task_params = task.get_parameters(backwards_compatibility=False)
if 'General/ssh_server' in task_params: if 'General/ssh_server' in task_params:
section = 'General' section = 'General'
init_section = 'init_script' init_section = 'init_script'
else: else:
section, _, init_section = _get_config_section_name() section, _, init_section = _get_config_section_name()
task_params['properties/jupyter_token'] = ''
if not runtime_prop_support:
task_params['properties/jupyter_token'] = ''
task_params['{}/ssh_password'.format(section)] = state['password']
task_params['{}/user_key'.format(section)] = config_obj.get("api.credentials.access_key")
task_params['{}/user_secret'.format(section)] = config_obj.get("api.credentials.secret_key")
task_params['properties/jupyter_port'] = '' task_params['properties/jupyter_port'] = ''
if state.get('remote_gateway') is not None: if state.get('remote_gateway') is not None:
remote_gateway_parts = str(state.get('remote_gateway')).split(':') remote_gateway_parts = str(state.get('remote_gateway')).split(':')
@ -416,9 +491,6 @@ def clone_task(state, project_id):
if len(remote_gateway_parts) > 1: if len(remote_gateway_parts) > 1:
task_params['properties/external_ssh_port'] = remote_gateway_parts[1] task_params['properties/external_ssh_port'] = remote_gateway_parts[1]
task_params['{}/ssh_server'.format(section)] = str(True) task_params['{}/ssh_server'.format(section)] = str(True)
task_params['{}/ssh_password'.format(section)] = state['password']
task_params['{}/user_key'.format(section)] = config_obj.get("api.credentials.access_key")
task_params['{}/user_secret'.format(section)] = config_obj.get("api.credentials.secret_key")
task_params["{}/jupyterlab".format(section)] = bool(state.get('jupyter_lab')) task_params["{}/jupyterlab".format(section)] = bool(state.get('jupyter_lab'))
task_params["{}/vscode_server".format(section)] = bool(state.get('vscode_server')) task_params["{}/vscode_server".format(section)] = bool(state.get('vscode_server'))
task_params["{}/public_ip".format(section)] = bool(state.get('public_ip')) task_params["{}/public_ip".format(section)] = bool(state.get('public_ip'))
@ -443,13 +515,30 @@ def clone_task(state, project_id):
# store the .git-credentials # store the .git-credentials
if state.get('git_credentials'): if state.get('git_credentials'):
git_cred_file = os.path.join(os.path.expanduser('~'), '.git-credentials') git_cred_file = os.path.join(os.path.expanduser('~'), '.git-credentials')
if os.path.isfile(git_cred_file):
task.connect_configuration(
configuration=git_cred_file, name='git_credentials', description='git credentials')
git_conf_file = os.path.join(os.path.expanduser('~'), '.gitconfig') git_conf_file = os.path.join(os.path.expanduser('~'), '.gitconfig')
if os.path.isfile(git_conf_file): if not os.path.isfile(git_cred_file):
task.connect_configuration( git_cred_file = None
configuration=git_conf_file, name='git_config', description='git config') if not os.path.isfile(git_conf_file):
git_conf_file = None
if runtime_prop_support:
# noinspection PyProtectedMember
runtime_properties = dict(task._get_runtime_properties() or {})
if git_cred_file:
runtime_properties['_git_credentials'] = _b64_encode_file(git_cred_file)
if git_conf_file:
runtime_properties['_git_config'] = _b64_encode_file(git_conf_file)
# store back
if git_cred_file or git_conf_file:
# noinspection PyProtectedMember
task._set_runtime_properties(runtime_properties)
else:
if git_cred_file:
task.connect_configuration(
configuration=git_cred_file, name='git_credentials', description='git credentials')
if git_conf_file:
task.connect_configuration(
configuration=git_conf_file, name='git_config', description='git config')
if state.get('packages'): if state.get('packages'):
requirements = task.data.script.requirements or {} requirements = task.data.script.requirements or {}
@ -476,21 +565,28 @@ def wait_for_machine(state, task):
# wait until task is running # wait until task is running
print('Waiting for remote machine allocation [id={}]'.format(task.id)) print('Waiting for remote machine allocation [id={}]'.format(task.id))
last_status = None last_status = None
while last_status != 'in_progress' and last_status in (None, 'created', 'queued', 'unknown',): last_message = None
stopped_counter = 0
while last_status != 'in_progress' and last_status in (None, 'created', 'queued', 'unknown', 'stopped'):
print('.', end='', flush=True) print('.', end='', flush=True)
if last_status is not None: if last_status is not None:
sleep(2.) sleep(2.)
status = task.get_status() stopped_counter = (stopped_counter+1) if last_status == 'stopped' else 0
if last_status != status: if stopped_counter > 5:
break
# noinspection PyProtectedMember
status, message = task._get_status()
status = str(status)
if last_status != status or last_message != message:
# noinspection PyProtectedMember # noinspection PyProtectedMember
last_status = task._get_status()[1] print('Status [{}]{} {}'.format(status, ' - {}'.format(last_status) if last_status else '', message))
print('Status [{}]{}'.format(status, ' - {}'.format(last_status) if last_status else ''))
last_status = status last_status = status
last_message = message
print('Remote machine allocated') print('Remote machine allocated')
print('Setting remote environment [Task id={}]'.format(task.id)) print('Setting remote environment [Task id={}]'.format(task.id))
print('Setup process details: {}'.format(task.get_output_log_web_page())) print('Setup process details: {}'.format(task.get_output_log_web_page()))
print('Waiting for environment setup to complete [usually about 20-30 seconds]') print('Waiting for environment setup to complete [usually about 20-30 seconds, see last log line/s below]')
# monitor progress, until we get the new jupyter, then we know it is working # monitor progress, until we get the new jupyter, then we know it is working
task.reload() task.reload()
@ -513,24 +609,38 @@ def wait_for_machine(state, task):
last_lines = [] last_lines = []
period_counter = 0 period_counter = 0
while any(bool(not task.get_parameter(p)) for p in wait_properties) and task.get_status() == 'in_progress': while any(bool(not task.get_parameter(p)) for p in wait_properties) and task.get_status() == 'in_progress':
lines = task.get_reported_console_output(10) if state.get('debug') else [] lines = task.get_reported_console_output(10 if state.get('verbose') else 1)
if last_lines != lines: if last_lines != lines:
# new line if we had '.' counter in the previous run # new line if we had '.' counter in the previous run
if period_counter: if period_counter:
print('') if state.get('verbose'):
print('')
period_counter = 0 period_counter = 0
try: try:
index = next(i for i, line in enumerate(lines) if last_lines and line == last_lines[-1]) index = next(i for i, line in enumerate(lines) if last_lines and line == last_lines[-1])
print('> ' + ''.join(lines[index+1:]).rstrip().replace('\n', '\n> ')) print_line = '> ' + ''.join(lines[index+1:]).rstrip().replace('\n', '\n> ')
except StopIteration: except StopIteration:
print('> ' + ''.join(lines).rstrip().replace('\n', '\n> ')) print_line = '> ' + ''.join(lines).rstrip().replace('\n', '\n> ')
if state.get('verbose'):
print(print_line)
else:
print_line = [l for l in print_line.split('\n') if l.rstrip()]
if print_line:
print('\r' + print_line[-1], end='', flush=True)
last_lines = lines last_lines = lines
else: else:
print('.', end='', flush=True)
period_counter += 1 period_counter += 1
print(('' if state.get('verbose') else '\r') + '.'*period_counter, end='', flush=True)
sleep(3.) sleep(3.)
task.reload() task.reload()
# clear the line
if not state.get('verbose'):
print('\r ', end='', flush=True)
print('\n')
if task.get_status() != 'in_progress': if task.get_status() != 'in_progress':
raise ValueError("Remote setup failed (status={}) see details: {}".format( raise ValueError("Remote setup failed (status={}) see details: {}".format(
task.get_status(), task.get_output_log_web_page())) task.get_status(), task.get_output_log_web_page()))
@ -628,14 +738,21 @@ def monitor_ssh_tunnel(state, task):
if not all([ssh_port, jupyter_token, jupyter_port, internal_ssh_port, ssh_password, remote_address]): if not all([ssh_port, jupyter_token, jupyter_port, internal_ssh_port, ssh_password, remote_address]):
task.reload() task.reload()
task_parameters = task.get_parameters() task_parameters = task.get_parameters()
section = 'General' if 'General/ssh_server' in task_parameters else default_section if Session.check_min_api_version("2.13"):
# noinspection PyProtectedMember
runtime_prop = task._get_runtime_properties()
ssh_password = runtime_prop.get('_ssh_password') or state.get('password', '')
jupyter_token = runtime_prop.get('_jupyter_token')
else:
section = 'General' if 'General/ssh_server' in task_parameters else default_section
ssh_password = task_parameters.get('{}/ssh_password'.format(section)) or state.get('password', '')
jupyter_token = task_parameters.get('properties/jupyter_token')
remote_address = \ remote_address = \
task_parameters.get('properties/k8s-gateway-address') or \ task_parameters.get('properties/k8s-gateway-address') or \
task_parameters.get('properties/external_address') task_parameters.get('properties/external_address')
ssh_password = task_parameters.get('{}/ssh_password'.format(section)) or state.get('password', '')
internal_ssh_port = task_parameters.get('properties/internal_ssh_port') internal_ssh_port = task_parameters.get('properties/internal_ssh_port')
jupyter_port = task_parameters.get('properties/jupyter_port') jupyter_port = task_parameters.get('properties/jupyter_port')
jupyter_token = task_parameters.get('properties/jupyter_token')
ssh_port = \ ssh_port = \
task_parameters.get('properties/k8s-pod-port') or \ task_parameters.get('properties/k8s-pod-port') or \
task_parameters.get('properties/external_ssh_port') or internal_ssh_port task_parameters.get('properties/external_ssh_port') or internal_ssh_port
@ -671,7 +788,7 @@ def monitor_ssh_tunnel(state, task):
state.get('username') or 'root', state.get('username') or 'root',
remote_address, ssh_port, ssh_password, remote_address, ssh_port, ssh_password,
local_remote_pair_list=local_remote_pair_list, local_remote_pair_list=local_remote_pair_list,
debug=state.get('debug', False), debug=state.get('verbose', False),
) )
if ssh_process and ssh_process.isalive(): if ssh_process and ssh_process.isalive():
@ -826,8 +943,9 @@ def setup_parser(parser):
parser.add_argument('--username', type=str, default=None, parser.add_argument('--username', type=str, default=None,
help='Advanced: Select ssh username for the interactive session ' help='Advanced: Select ssh username for the interactive session '
'(default: `root` or previously used one)') '(default: `root` or previously used one)')
parser.add_argument('--debug', action='store_true', default=None, parser.add_argument('--verbose', action='store_true', default=None,
help='Advanced: If set, print debugging information') help='Advanced: If set, print verbose progress information, '
'e.g. the remote machine setup process log')
def get_version(): def get_version():
@ -862,8 +980,8 @@ def cli():
state_file = os.path.abspath(os.path.expandvars(os.path.expanduser(args.config_file))) state_file = os.path.abspath(os.path.expandvars(os.path.expanduser(args.config_file)))
state = load_state(state_file) state = load_state(state_file)
if args.debug: if args.verbose:
state['debug'] = args.debug state['verbose'] = args.verbose
client = APIClient() client = APIClient()

View File

@ -1,3 +1,4 @@
import base64
import json import json
import os import os
import socket import socket
@ -13,6 +14,7 @@ import psutil
from pathlib2 import Path from pathlib2 import Path
from clearml import Task, StorageManager from clearml import Task, StorageManager
from clearml.backend_api import Session
# noinspection SpellCheckingInspection # noinspection SpellCheckingInspection
@ -97,7 +99,25 @@ def init_task(param, a_default_ssh_fingerprint):
project_name="DevOps", task_name="Allocate Jupyter Notebook Instance", task_type=Task.TaskTypes.service) project_name="DevOps", task_name="Allocate Jupyter Notebook Instance", task_type=Task.TaskTypes.service)
# Add jupyter server base folder # Add jupyter server base folder
task.connect(param, name=config_section_name) if Session.check_min_api_version('2.13'):
param.pop('user_key', None)
param.pop('user_secret', None)
param.pop('ssh_password', None)
task.connect(param, name=config_section_name)
# noinspection PyProtectedMember
runtime_prop = dict(task._get_runtime_properties())
# remove the user key/secret the moment we have it
param['user_key'] = runtime_prop.pop('_user_key', None)
param['user_secret'] = runtime_prop.pop('_user_secret', None)
# no need to reset, we will need it
param['ssh_password'] = runtime_prop.get('_ssh_password')
# Force removing properties
# noinspection PyProtectedMember
task._edit(runtime=runtime_prop)
task.reload()
else:
task.connect(param, name=config_section_name)
# connect ssh finger print configuration (with fallback if section is missing) # connect ssh finger print configuration (with fallback if section is missing)
old_default_ssh_fingerprint = deepcopy(a_default_ssh_fingerprint) old_default_ssh_fingerprint = deepcopy(a_default_ssh_fingerprint)
try: try:
@ -123,16 +143,17 @@ def setup_os_env(param):
"_API_SECRET_KEY", "_API_SECRET_KEY",
"_API_HOST_VERIFY_CERT", "_API_HOST_VERIFY_CERT",
"_DOCKER_IMAGE", "_DOCKER_IMAGE",
"_DOCKER_BASH_SCRIPT",
) )
# set default docker image, with network configuration # set default docker image, with network configuration
if param.get('default_docker', '').strip(): if param.get('default_docker', '').strip():
os.environ["TRAINS_DOCKER_IMAGE"] = param['default_docker'].strip()
os.environ["CLEARML_DOCKER_IMAGE"] = param['default_docker'].strip() os.environ["CLEARML_DOCKER_IMAGE"] = param['default_docker'].strip()
# setup os environment # setup os environment
env = deepcopy(os.environ) env = deepcopy(os.environ)
for key in os.environ: for key in os.environ:
if (key.startswith("TRAINS") or key.startswith("CLEARML")) and not any(key.endswith(p) for p in preserve): # only set CLEARML_ remove any TRAINS_
if key.startswith("TRAINS") or (key.startswith("CLEARML") and not any(key.endswith(p) for p in preserve)):
env.pop(key, None) env.pop(key, None)
return env return env
@ -188,8 +209,7 @@ def monitor_jupyter_server(fd, local_filename, process, task, jupyter_port, host
# we could not locate the token, try again # we could not locate the token, try again
if not token: if not token:
continue continue
# update the task with the correct links and token
task.set_parameter(name='properties/jupyter_token', value=str(token))
# we ignore the reported port, because jupyter server will get confused # we ignore the reported port, because jupyter server will get confused
# if we have multiple servers running and will point to the wrong port/server # if we have multiple servers running and will point to the wrong port/server
task.set_parameter(name='properties/jupyter_port', value=str(jupyter_port)) task.set_parameter(name='properties/jupyter_port', value=str(jupyter_port))
@ -197,8 +217,20 @@ def monitor_jupyter_server(fd, local_filename, process, task, jupyter_port, host
'https' if "https://" in line else 'http', 'https' if "https://" in line else 'http',
hostnames, jupyter_port, token hostnames, jupyter_port, token
) )
# update the task with the correct links and token
if Session.check_min_api_version("2.13"):
# noinspection PyProtectedMember
runtime_prop = task._get_runtime_properties()
runtime_prop['_jupyter_token'] = str(token)
runtime_prop['_jupyter_url'] = str(jupyter_url)
# noinspection PyProtectedMember
task._set_runtime_properties(runtime_prop)
else:
task.set_parameter(name='properties/jupyter_token', value=str(token))
task.set_parameter(name='properties/jupyter_url', value=jupyter_url)
print('\nJupyter Lab URL: {}\n'.format(jupyter_url)) print('\nJupyter Lab URL: {}\n'.format(jupyter_url))
task.set_parameter(name='properties/jupyter_url', value=jupyter_url)
# cleanup # cleanup
# noinspection PyBroadException # noinspection PyBroadException
@ -219,8 +251,8 @@ def start_vscode_server(hostname, hostnames, param, task, env):
# get vscode version and python extension version # get vscode version and python extension version
# they are extremely flaky, this combination works, most do not. # they are extremely flaky, this combination works, most do not.
vscode_version = '3.9.2' vscode_version = '3.12.0'
python_ext_version = '2021.3.658691958' python_ext_version = '2021.10.1365161279'
if param.get("vscode_version"): if param.get("vscode_version"):
vscode_version_parts = param.get("vscode_version").split(':') vscode_version_parts = param.get("vscode_version").split(':')
vscode_version = vscode_version_parts[0] vscode_version = vscode_version_parts[0]
@ -291,7 +323,7 @@ def start_vscode_server(hostname, hostnames, param, task, env):
"--extensions-dir", exts_folder, "--extensions-dir", exts_folder,
"--install-extension", "ms-toolsai.jupyter", "--install-extension", "ms-toolsai.jupyter",
# "--install-extension", "donjayamanne.python-extension-pack" # "--install-extension", "donjayamanne.python-extension-pack"
] + ["--install-extension", python_ext] if python_ext else [], ] + ["--install-extension", "ms-python.python@{}".format(python_ext_version)] if python_ext else [],
env=env, env=env,
stdout=fd, stdout=fd,
stderr=fd, stderr=fd,
@ -343,7 +375,7 @@ def start_jupyter_server(hostname, hostnames, param, task, env):
print('no jupyterlab to monitor - going to sleep') print('no jupyterlab to monitor - going to sleep')
while True: while True:
sleep(10.) sleep(10.)
return return # noqa
# execute jupyter notebook # execute jupyter notebook
fd, local_filename = mkstemp() fd, local_filename = mkstemp()
@ -418,15 +450,15 @@ def setup_ssh_server(hostname, hostnames, param, task):
"&& " # noqa: W605 "&& " # noqa: W605
"echo 'ClientAliveInterval 10' >> /etc/ssh/sshd_config && " "echo 'ClientAliveInterval 10' >> /etc/ssh/sshd_config && "
"echo 'ClientAliveCountMax 20' >> /etc/ssh/sshd_config && " "echo 'ClientAliveCountMax 20' >> /etc/ssh/sshd_config && "
"echo 'AcceptEnv TRAINS_API_ACCESS_KEY TRAINS_API_SECRET_KEY " "echo 'AcceptEnv CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY "
"CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY' >> /etc/ssh/sshd_config && " "CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY' >> /etc/ssh/sshd_config && "
'echo "export VISIBLE=now" >> /etc/profile && ' 'echo "export VISIBLE=now" >> /etc/profile && '
'echo "export PATH=$PATH" >> /etc/profile && ' 'echo "export PATH=$PATH" >> /etc/profile && '
'echo "ldconfig" >> /etc/profile && ' 'echo "ldconfig" >> /etc/profile && '
'echo "export TRAINS_CONFIG_FILE={trains_config_file}" >> /etc/profile'.format( 'echo "export CLEARML_CONFIG_FILE={trains_config_file}" >> /etc/profile'.format(
password=ssh_password, password=ssh_password,
port=port, port=port,
trains_config_file=os.environ.get("CLEARML_CONFIG_FILE") or os.environ.get("TRAINS_CONFIG_FILE"), trains_config_file=os.environ.get("CLEARML_CONFIG_FILE") or os.environ.get("CLEARML_CONFIG_FILE"),
) )
) )
sshd_path = '/usr/sbin/sshd' sshd_path = '/usr/sbin/sshd'
@ -449,7 +481,7 @@ def setup_ssh_server(hostname, hostnames, param, task):
"UsePAM yes" + "\n"\ "UsePAM yes" + "\n"\
"AuthorizedKeysFile {}".format(os.path.join(ssh_config_path, 'authorized_keys')) + "\n"\ "AuthorizedKeysFile {}".format(os.path.join(ssh_config_path, 'authorized_keys')) + "\n"\
"PidFile {}".format(os.path.join(ssh_config_path, 'sshd.pid')) + "\n"\ "PidFile {}".format(os.path.join(ssh_config_path, 'sshd.pid')) + "\n"\
"AcceptEnv TRAINS_API_ACCESS_KEY TRAINS_API_SECRET_KEY "\ "AcceptEnv CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY "\
"CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY"+"\n" "CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY"+"\n"
for k in default_ssh_fingerprint: for k in default_ssh_fingerprint:
filename = os.path.join(ssh_config_path, '{}'.format(k.replace('__pub', '.pub'))) filename = os.path.join(ssh_config_path, '{}'.format(k.replace('__pub', '.pub')))
@ -511,13 +543,23 @@ def setup_ssh_server(hostname, hostnames, param, task):
print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex)) print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex))
def _b64_decode_file(encoded_string):
# noinspection PyBroadException
try:
import gzip
value = gzip.decompress(base64.decodebytes(encoded_string.encode('ascii'))).decode('utf8')
return value
except Exception:
return None
def setup_user_env(param, task): def setup_user_env(param, task):
env = setup_os_env(param) env = setup_os_env(param)
# do not change user bash/profile # do not change user bash/profile
if os.geteuid() != 0: if os.geteuid() != 0:
if param.get("user_key") and param.get("user_secret"): if param.get("user_key") and param.get("user_secret"):
env['TRAINS_API_ACCESS_KEY'] = param.get("user_key") env['CLEARML_API_ACCESS_KEY'] = param.get("user_key")
env['TRAINS_API_SECRET_KEY'] = param.get("user_secret") env['CLEARML_API_SECRET_KEY'] = param.get("user_secret")
return env return env
# create symbolic link to the venv # create symbolic link to the venv
@ -530,20 +572,20 @@ def setup_user_env(param, task):
pass pass
# set default user credentials # set default user credentials
if param.get("user_key") and param.get("user_secret"): if param.get("user_key") and param.get("user_secret"):
os.system("echo 'export TRAINS_API_ACCESS_KEY=\"{}\"' >> ~/.bashrc".format( os.system("echo 'export CLEARML_API_ACCESS_KEY=\"{}\"' >> ~/.bashrc".format(
param.get("user_key", "").replace('$', '\\$'))) param.get("user_key", "").replace('$', '\\$')))
os.system("echo 'export TRAINS_API_SECRET_KEY=\"{}\"' >> ~/.bashrc".format( os.system("echo 'export CLEARML_API_SECRET_KEY=\"{}\"' >> ~/.bashrc".format(
param.get("user_secret", "").replace('$', '\\$'))) param.get("user_secret", "").replace('$', '\\$')))
os.system("echo 'export TRAINS_DOCKER_IMAGE=\"{}\"' >> ~/.bashrc".format( os.system("echo 'export CLEARML_DOCKER_IMAGE=\"{}\"' >> ~/.bashrc".format(
param.get("default_docker", "").strip() or env.get('TRAINS_DOCKER_IMAGE', ''))) param.get("default_docker", "").strip() or env.get('CLEARML_DOCKER_IMAGE', '')))
os.system("echo 'export TRAINS_API_ACCESS_KEY=\"{}\"' >> ~/.profile".format( os.system("echo 'export CLEARML_API_ACCESS_KEY=\"{}\"' >> ~/.profile".format(
param.get("user_key", "").replace('$', '\\$'))) param.get("user_key", "").replace('$', '\\$')))
os.system("echo 'export TRAINS_API_SECRET_KEY=\"{}\"' >> ~/.profile".format( os.system("echo 'export CLEARML_API_SECRET_KEY=\"{}\"' >> ~/.profile".format(
param.get("user_secret", "").replace('$', '\\$'))) param.get("user_secret", "").replace('$', '\\$')))
os.system("echo 'export TRAINS_DOCKER_IMAGE=\"{}\"' >> ~/.profile".format( os.system("echo 'export CLEARML_DOCKER_IMAGE=\"{}\"' >> ~/.profile".format(
param.get("default_docker", "").strip() or env.get('TRAINS_DOCKER_IMAGE', ''))) param.get("default_docker", "").strip() or env.get('CLEARML_DOCKER_IMAGE', '')))
env['TRAINS_API_ACCESS_KEY'] = param.get("user_key") env['CLEARML_API_ACCESS_KEY'] = param.get("user_key")
env['TRAINS_API_SECRET_KEY'] = param.get("user_secret") env['CLEARML_API_SECRET_KEY'] = param.get("user_secret")
# set default folder for user # set default folder for user
if param.get("user_base_directory"): if param.get("user_base_directory"):
base_dir = param.get("user_base_directory") base_dir = param.get("user_base_directory")
@ -557,8 +599,27 @@ def setup_user_env(param, task):
os.system("echo '. {}' >> ~/.profile".format(os.path.join(environment, 'bin', 'activate'))) os.system("echo '. {}' >> ~/.profile".format(os.path.join(environment, 'bin', 'activate')))
# check if we need to create .git-credentials # check if we need to create .git-credentials
# noinspection PyProtectedMember
git_credentials = task._get_configuration_text('git_credentials') runtime_property_support = Session.check_min_api_version("2.13")
if runtime_property_support:
# noinspection PyProtectedMember
runtime_prop = dict(task._get_runtime_properties())
git_credentials = runtime_prop.pop('_git_credentials', None)
git_config = runtime_prop.pop('_git_config', None)
# force removing properties
# noinspection PyProtectedMember
task._edit(runtime=runtime_prop)
task.reload()
if git_credentials is not None:
git_credentials = _b64_decode_file(git_credentials)
if git_config is not None:
git_config = _b64_decode_file(git_config)
else:
# noinspection PyProtectedMember
git_credentials = task._get_configuration_text('git_credentials')
# noinspection PyProtectedMember
git_config = task._get_configuration_text('git_config')
if git_credentials: if git_credentials:
git_cred_file = os.path.expanduser('~/.config/git/credentials') git_cred_file = os.path.expanduser('~/.config/git/credentials')
# noinspection PyBroadException # noinspection PyBroadException
@ -568,8 +629,7 @@ def setup_user_env(param, task):
f.write(git_credentials) f.write(git_credentials)
except Exception: except Exception:
print('Could not write {} file'.format(git_cred_file)) print('Could not write {} file'.format(git_cred_file))
# noinspection PyProtectedMember
git_config = task._get_configuration_text('git_config')
if git_config: if git_config:
git_config_file = os.path.expanduser('~/.config/git/config') git_config_file = os.path.expanduser('~/.config/git/config')
# noinspection PyBroadException # noinspection PyBroadException