From 9c14f627b1b0418f2cd8f0d9ff4be3118451225c Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 18 Nov 2021 10:43:46 +0200 Subject: [PATCH 01/17] Change --debug to --verbose and added remote session setup progress print by default Update default vscode_version to 3.12.0 and python_ext_version to 2021.10.1365161279 Add message status reporting, fix k8s support Add support for Task runtime properties --- clearml_session/__main__.py | 244 +++++++++++++++----- clearml_session/interactive_session_task.py | 120 +++++++--- 2 files changed, 271 insertions(+), 93 deletions(-) diff --git a/clearml_session/__main__.py b/clearml_session/__main__.py index 804cfd9..87990bf 100644 --- a/clearml_session/__main__.py +++ b/clearml_session/__main__.py @@ -1,3 +1,4 @@ +import base64 import hashlib import json import logging @@ -20,6 +21,7 @@ import psutil from clearml import Task from clearml.backend_api.session.client import APIClient from clearml.config import config_obj +from clearml.backend_api import Session from .tcp_proxy import TcpProxy from .single_thread_proxy import SingleThreadProxy @@ -136,35 +138,59 @@ def create_base_task(state, project_name=None, task_name=None): task = Task.create(project_name=project_name or 'DevOps', task_name=task_name or 'Interactive Session', task_type=Task.TaskTypes.application) - task_state = task.export_task() + task_script = task.data.script.to_dict() base_script_file = os.path.abspath(os.path.join(__file__, '..', 'tcp_proxy.py')) with open(base_script_file, 'rt') as f: - task_state['script']['diff'] = f.read() + task_script['diff'] = f.read() base_script_file = os.path.abspath(os.path.join(__file__, '..', 'interactive_session_task.py')) with open(base_script_file, 'rt') as f: - task_state['script']['diff'] += '\n\n' + f.read() + task_script['diff'] += '\n\n' + f.read() - task_state['script']['working_dir'] = '.' - task_state['script']['entry_point'] = 'interactive_session.py' - task_state['script']['requirements'] = {'pip': '\n'.join( + task_script['working_dir'] = '.' + task_script['entry_point'] = 'interactive_session.py' + task_script['requirements'] = {'pip': '\n'.join( ["clearml"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) + (['pylint'] if state.get('vscode_server') else []))} - task.update_task(task_state) + section, _, _ = _get_config_section_name() - task.set_parameters({ - "{}/user_base_directory".format(section): "~/", - "{}/ssh_server".format(section): True, - "{}/ssh_password".format(section): "training", - "{}/default_docker".format(section): "nvidia/cuda", - "{}/user_key".format(section): '', - "{}/user_secret".format(section): '', - "properties/external_address": '', - "properties/internal_ssh_port": '', - "properties/jupyter_token": '', - "properties/jupyter_port": '', - }) + + if Session.check_min_api_version('2.13'): + _runtime_prop = dict(task._get_runtime_properties()) + _runtime_prop.update({ + "_user_key": '', + "_user_secret": '', + "_jupyter_token": '', + "_ssh_password": "training", + }) + # noinspection PyProtectedMember + task._set_runtime_properties(_runtime_prop) + task.set_parameters({ + "{}/user_base_directory".format(section): "~/", + "{}/ssh_server".format(section): True, + "{}/default_docker".format(section): "nvidia/cuda", + "properties/external_address": '', + "properties/internal_ssh_port": '', + "properties/jupyter_port": '', + }) + else: + task.set_parameters({ + "{}/user_base_directory".format(section): "~/", + "{}/ssh_server".format(section): True, + "{}/ssh_password".format(section): "training", + "{}/default_docker".format(section): "nvidia/cuda", + "{}/user_key".format(section): '', + "{}/user_secret".format(section): '', + "properties/external_address": '', + "properties/internal_ssh_port": '', + "properties/jupyter_token": '', + "properties/jupyter_port": '', + }) + task.set_system_tags([system_tag]) - task.reset(force=True) + + # only update the data at the end, so reload requests are smaller + # noinspection PyProtectedMember + task._edit(script=task_script) return task @@ -197,18 +223,38 @@ def create_debugging_task(state, debug_task_id): (['pylint'] if state.get('vscode_server') else []) task.update_task(task_state) section, _, _ = _get_config_section_name() - task.set_parameters({ - "{}/user_base_directory".format(section): "~/", - "{}/ssh_server".format(section): True, - "{}/ssh_password".format(section): "training", - "{}/default_docker".format(section): "nvidia/cuda", - "{}/user_key".format(section): '', - "{}/user_secret".format(section): '', - "properties/external_address": '', - "properties/internal_ssh_port": '', - "properties/jupyter_token": '', - "properties/jupyter_port": '', - }) + + if Session.check_min_api_version('2.13'): + _runtime_prop = dict(task._get_runtime_properties()) + _runtime_prop.update({ + "_user_key": '', + "_user_secret": '', + "_jupyter_token": '', + "_ssh_password": "training", + }) + # noinspection PyProtectedMember + task._set_runtime_properties(_runtime_prop) + task.set_parameters({ + "{}/user_base_directory".format(section): "~/", + "{}/ssh_server".format(section): True, + "{}/default_docker".format(section): "nvidia/cuda", + "properties/external_address": '', + "properties/internal_ssh_port": '', + "properties/jupyter_port": '', + }) + else: + task.set_parameters({ + "{}/user_base_directory".format(section): "~/", + "{}/ssh_server".format(section): True, + "{}/ssh_password".format(section): "training", + "{}/default_docker".format(section): "nvidia/cuda", + "{}/user_key".format(section): '', + "{}/user_secret".format(section): '', + "properties/external_address": '', + "properties/internal_ssh_port": '', + "properties/jupyter_token": '', + "properties/jupyter_port": '', + }) task.set_system_tags([system_tag]) task.reset(force=True) return task @@ -265,6 +311,17 @@ def _get_user_id(client): return current_user_id +def _b64_encode_file(file): + # noinspection PyBroadException + try: + import gzip + with open(file, 'rt') as f: + git_credentials = gzip.compress(f.read().encode('utf8')) + return base64.encodebytes(git_credentials).decode('ascii') + except Exception: + return None + + def get_project_id(state): project_id = None project_name = state.get('project') or None @@ -383,8 +440,8 @@ def load_state(state_file): state = json.load(f) except Exception: state = {} - # never reload --debug state - state.pop('debug', None) + # never reload --verbose state + state.pop('verbose', None) return state @@ -402,13 +459,31 @@ def clone_task(state, project_id): task = create_base_task(state, project_name=state.get('project')) new_task = True + print('Configuring new session') + runtime_prop_support = Session.check_min_api_version("2.13") + if runtime_prop_support: + # noinspection PyProtectedMember + runtime_properties = dict(task._get_runtime_properties() or {}) + runtime_properties['_jupyter_token'] = '' + runtime_properties['_ssh_password'] = str(state['password']) + runtime_properties['_user_key'] = str(config_obj.get("api.credentials.access_key")) + runtime_properties['_user_secret'] = (config_obj.get("api.credentials.secret_key")) + # noinspection PyProtectedMember + task._set_runtime_properties(runtime_properties) + task_params = task.get_parameters(backwards_compatibility=False) if 'General/ssh_server' in task_params: section = 'General' init_section = 'init_script' else: section, _, init_section = _get_config_section_name() - task_params['properties/jupyter_token'] = '' + + if not runtime_prop_support: + task_params['properties/jupyter_token'] = '' + task_params['{}/ssh_password'.format(section)] = state['password'] + task_params['{}/user_key'.format(section)] = config_obj.get("api.credentials.access_key") + task_params['{}/user_secret'.format(section)] = config_obj.get("api.credentials.secret_key") + task_params['properties/jupyter_port'] = '' if state.get('remote_gateway') is not None: remote_gateway_parts = str(state.get('remote_gateway')).split(':') @@ -416,9 +491,6 @@ def clone_task(state, project_id): if len(remote_gateway_parts) > 1: task_params['properties/external_ssh_port'] = remote_gateway_parts[1] task_params['{}/ssh_server'.format(section)] = str(True) - task_params['{}/ssh_password'.format(section)] = state['password'] - task_params['{}/user_key'.format(section)] = config_obj.get("api.credentials.access_key") - task_params['{}/user_secret'.format(section)] = config_obj.get("api.credentials.secret_key") task_params["{}/jupyterlab".format(section)] = bool(state.get('jupyter_lab')) task_params["{}/vscode_server".format(section)] = bool(state.get('vscode_server')) task_params["{}/public_ip".format(section)] = bool(state.get('public_ip')) @@ -443,13 +515,30 @@ def clone_task(state, project_id): # store the .git-credentials if state.get('git_credentials'): git_cred_file = os.path.join(os.path.expanduser('~'), '.git-credentials') - if os.path.isfile(git_cred_file): - task.connect_configuration( - configuration=git_cred_file, name='git_credentials', description='git credentials') git_conf_file = os.path.join(os.path.expanduser('~'), '.gitconfig') - if os.path.isfile(git_conf_file): - task.connect_configuration( - configuration=git_conf_file, name='git_config', description='git config') + if not os.path.isfile(git_cred_file): + git_cred_file = None + if not os.path.isfile(git_conf_file): + git_conf_file = None + + if runtime_prop_support: + # noinspection PyProtectedMember + runtime_properties = dict(task._get_runtime_properties() or {}) + if git_cred_file: + runtime_properties['_git_credentials'] = _b64_encode_file(git_cred_file) + if git_conf_file: + runtime_properties['_git_config'] = _b64_encode_file(git_conf_file) + # store back + if git_cred_file or git_conf_file: + # noinspection PyProtectedMember + task._set_runtime_properties(runtime_properties) + else: + if git_cred_file: + task.connect_configuration( + configuration=git_cred_file, name='git_credentials', description='git credentials') + if git_conf_file: + task.connect_configuration( + configuration=git_conf_file, name='git_config', description='git config') if state.get('packages'): requirements = task.data.script.requirements or {} @@ -476,21 +565,28 @@ def wait_for_machine(state, task): # wait until task is running print('Waiting for remote machine allocation [id={}]'.format(task.id)) last_status = None - while last_status != 'in_progress' and last_status in (None, 'created', 'queued', 'unknown',): + last_message = None + stopped_counter = 0 + while last_status != 'in_progress' and last_status in (None, 'created', 'queued', 'unknown', 'stopped'): print('.', end='', flush=True) if last_status is not None: sleep(2.) - status = task.get_status() - if last_status != status: + stopped_counter = (stopped_counter+1) if last_status == 'stopped' else 0 + if stopped_counter > 5: + break + # noinspection PyProtectedMember + status, message = task._get_status() + status = str(status) + if last_status != status or last_message != message: # noinspection PyProtectedMember - last_status = task._get_status()[1] - print('Status [{}]{}'.format(status, ' - {}'.format(last_status) if last_status else '')) + print('Status [{}]{} {}'.format(status, ' - {}'.format(last_status) if last_status else '', message)) last_status = status + last_message = message print('Remote machine allocated') print('Setting remote environment [Task id={}]'.format(task.id)) print('Setup process details: {}'.format(task.get_output_log_web_page())) - print('Waiting for environment setup to complete [usually about 20-30 seconds]') + print('Waiting for environment setup to complete [usually about 20-30 seconds, see last log line/s below]') # monitor progress, until we get the new jupyter, then we know it is working task.reload() @@ -513,24 +609,38 @@ def wait_for_machine(state, task): last_lines = [] period_counter = 0 while any(bool(not task.get_parameter(p)) for p in wait_properties) and task.get_status() == 'in_progress': - lines = task.get_reported_console_output(10) if state.get('debug') else [] + lines = task.get_reported_console_output(10 if state.get('verbose') else 1) if last_lines != lines: # new line if we had '.' counter in the previous run if period_counter: - print('') + if state.get('verbose'): + print('') period_counter = 0 try: index = next(i for i, line in enumerate(lines) if last_lines and line == last_lines[-1]) - print('> ' + ''.join(lines[index+1:]).rstrip().replace('\n', '\n> ')) + print_line = '> ' + ''.join(lines[index+1:]).rstrip().replace('\n', '\n> ') except StopIteration: - print('> ' + ''.join(lines).rstrip().replace('\n', '\n> ')) + print_line = '> ' + ''.join(lines).rstrip().replace('\n', '\n> ') + + if state.get('verbose'): + print(print_line) + else: + print_line = [l for l in print_line.split('\n') if l.rstrip()] + if print_line: + print('\r' + print_line[-1], end='', flush=True) last_lines = lines else: - print('.', end='', flush=True) period_counter += 1 + print(('' if state.get('verbose') else '\r') + '.'*period_counter, end='', flush=True) sleep(3.) task.reload() + + # clear the line + if not state.get('verbose'): + print('\r ', end='', flush=True) + print('\n') + if task.get_status() != 'in_progress': raise ValueError("Remote setup failed (status={}) see details: {}".format( task.get_status(), task.get_output_log_web_page())) @@ -628,14 +738,21 @@ def monitor_ssh_tunnel(state, task): if not all([ssh_port, jupyter_token, jupyter_port, internal_ssh_port, ssh_password, remote_address]): task.reload() task_parameters = task.get_parameters() - section = 'General' if 'General/ssh_server' in task_parameters else default_section + if Session.check_min_api_version("2.13"): + # noinspection PyProtectedMember + runtime_prop = task._get_runtime_properties() + ssh_password = runtime_prop.get('_ssh_password') or state.get('password', '') + jupyter_token = runtime_prop.get('_jupyter_token') + else: + section = 'General' if 'General/ssh_server' in task_parameters else default_section + ssh_password = task_parameters.get('{}/ssh_password'.format(section)) or state.get('password', '') + jupyter_token = task_parameters.get('properties/jupyter_token') + remote_address = \ task_parameters.get('properties/k8s-gateway-address') or \ task_parameters.get('properties/external_address') - ssh_password = task_parameters.get('{}/ssh_password'.format(section)) or state.get('password', '') internal_ssh_port = task_parameters.get('properties/internal_ssh_port') jupyter_port = task_parameters.get('properties/jupyter_port') - jupyter_token = task_parameters.get('properties/jupyter_token') ssh_port = \ task_parameters.get('properties/k8s-pod-port') or \ task_parameters.get('properties/external_ssh_port') or internal_ssh_port @@ -671,7 +788,7 @@ def monitor_ssh_tunnel(state, task): state.get('username') or 'root', remote_address, ssh_port, ssh_password, local_remote_pair_list=local_remote_pair_list, - debug=state.get('debug', False), + debug=state.get('verbose', False), ) if ssh_process and ssh_process.isalive(): @@ -826,8 +943,9 @@ def setup_parser(parser): parser.add_argument('--username', type=str, default=None, help='Advanced: Select ssh username for the interactive session ' '(default: `root` or previously used one)') - parser.add_argument('--debug', action='store_true', default=None, - help='Advanced: If set, print debugging information') + parser.add_argument('--verbose', action='store_true', default=None, + help='Advanced: If set, print verbose progress information, ' + 'e.g. the remote machine setup process log') def get_version(): @@ -862,8 +980,8 @@ def cli(): state_file = os.path.abspath(os.path.expandvars(os.path.expanduser(args.config_file))) state = load_state(state_file) - if args.debug: - state['debug'] = args.debug + if args.verbose: + state['verbose'] = args.verbose client = APIClient() diff --git a/clearml_session/interactive_session_task.py b/clearml_session/interactive_session_task.py index dbfbc1d..5b581d0 100644 --- a/clearml_session/interactive_session_task.py +++ b/clearml_session/interactive_session_task.py @@ -1,3 +1,4 @@ +import base64 import json import os import socket @@ -13,6 +14,7 @@ import psutil from pathlib2 import Path from clearml import Task, StorageManager +from clearml.backend_api import Session # noinspection SpellCheckingInspection @@ -97,7 +99,25 @@ def init_task(param, a_default_ssh_fingerprint): project_name="DevOps", task_name="Allocate Jupyter Notebook Instance", task_type=Task.TaskTypes.service) # Add jupyter server base folder - task.connect(param, name=config_section_name) + if Session.check_min_api_version('2.13'): + param.pop('user_key', None) + param.pop('user_secret', None) + param.pop('ssh_password', None) + task.connect(param, name=config_section_name) + # noinspection PyProtectedMember + runtime_prop = dict(task._get_runtime_properties()) + # remove the user key/secret the moment we have it + param['user_key'] = runtime_prop.pop('_user_key', None) + param['user_secret'] = runtime_prop.pop('_user_secret', None) + # no need to reset, we will need it + param['ssh_password'] = runtime_prop.get('_ssh_password') + # Force removing properties + # noinspection PyProtectedMember + task._edit(runtime=runtime_prop) + task.reload() + else: + task.connect(param, name=config_section_name) + # connect ssh finger print configuration (with fallback if section is missing) old_default_ssh_fingerprint = deepcopy(a_default_ssh_fingerprint) try: @@ -123,16 +143,17 @@ def setup_os_env(param): "_API_SECRET_KEY", "_API_HOST_VERIFY_CERT", "_DOCKER_IMAGE", + "_DOCKER_BASH_SCRIPT", ) # set default docker image, with network configuration if param.get('default_docker', '').strip(): - os.environ["TRAINS_DOCKER_IMAGE"] = param['default_docker'].strip() os.environ["CLEARML_DOCKER_IMAGE"] = param['default_docker'].strip() # setup os environment env = deepcopy(os.environ) for key in os.environ: - if (key.startswith("TRAINS") or key.startswith("CLEARML")) and not any(key.endswith(p) for p in preserve): + # only set CLEARML_ remove any TRAINS_ + if key.startswith("TRAINS") or (key.startswith("CLEARML") and not any(key.endswith(p) for p in preserve)): env.pop(key, None) return env @@ -188,8 +209,7 @@ def monitor_jupyter_server(fd, local_filename, process, task, jupyter_port, host # we could not locate the token, try again if not token: continue - # update the task with the correct links and token - task.set_parameter(name='properties/jupyter_token', value=str(token)) + # we ignore the reported port, because jupyter server will get confused # if we have multiple servers running and will point to the wrong port/server task.set_parameter(name='properties/jupyter_port', value=str(jupyter_port)) @@ -197,8 +217,20 @@ def monitor_jupyter_server(fd, local_filename, process, task, jupyter_port, host 'https' if "https://" in line else 'http', hostnames, jupyter_port, token ) + + # update the task with the correct links and token + if Session.check_min_api_version("2.13"): + # noinspection PyProtectedMember + runtime_prop = task._get_runtime_properties() + runtime_prop['_jupyter_token'] = str(token) + runtime_prop['_jupyter_url'] = str(jupyter_url) + # noinspection PyProtectedMember + task._set_runtime_properties(runtime_prop) + else: + task.set_parameter(name='properties/jupyter_token', value=str(token)) + task.set_parameter(name='properties/jupyter_url', value=jupyter_url) + print('\nJupyter Lab URL: {}\n'.format(jupyter_url)) - task.set_parameter(name='properties/jupyter_url', value=jupyter_url) # cleanup # noinspection PyBroadException @@ -219,8 +251,8 @@ def start_vscode_server(hostname, hostnames, param, task, env): # get vscode version and python extension version # they are extremely flaky, this combination works, most do not. - vscode_version = '3.9.2' - python_ext_version = '2021.3.658691958' + vscode_version = '3.12.0' + python_ext_version = '2021.10.1365161279' if param.get("vscode_version"): vscode_version_parts = param.get("vscode_version").split(':') vscode_version = vscode_version_parts[0] @@ -291,7 +323,7 @@ def start_vscode_server(hostname, hostnames, param, task, env): "--extensions-dir", exts_folder, "--install-extension", "ms-toolsai.jupyter", # "--install-extension", "donjayamanne.python-extension-pack" - ] + ["--install-extension", python_ext] if python_ext else [], + ] + ["--install-extension", "ms-python.python@{}".format(python_ext_version)] if python_ext else [], env=env, stdout=fd, stderr=fd, @@ -343,7 +375,7 @@ def start_jupyter_server(hostname, hostnames, param, task, env): print('no jupyterlab to monitor - going to sleep') while True: sleep(10.) - return + return # noqa # execute jupyter notebook fd, local_filename = mkstemp() @@ -418,15 +450,15 @@ def setup_ssh_server(hostname, hostnames, param, task): "&& " # noqa: W605 "echo 'ClientAliveInterval 10' >> /etc/ssh/sshd_config && " "echo 'ClientAliveCountMax 20' >> /etc/ssh/sshd_config && " - "echo 'AcceptEnv TRAINS_API_ACCESS_KEY TRAINS_API_SECRET_KEY " + "echo 'AcceptEnv CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY " "CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY' >> /etc/ssh/sshd_config && " 'echo "export VISIBLE=now" >> /etc/profile && ' 'echo "export PATH=$PATH" >> /etc/profile && ' 'echo "ldconfig" >> /etc/profile && ' - 'echo "export TRAINS_CONFIG_FILE={trains_config_file}" >> /etc/profile'.format( + 'echo "export CLEARML_CONFIG_FILE={trains_config_file}" >> /etc/profile'.format( password=ssh_password, port=port, - trains_config_file=os.environ.get("CLEARML_CONFIG_FILE") or os.environ.get("TRAINS_CONFIG_FILE"), + trains_config_file=os.environ.get("CLEARML_CONFIG_FILE") or os.environ.get("CLEARML_CONFIG_FILE"), ) ) sshd_path = '/usr/sbin/sshd' @@ -449,7 +481,7 @@ def setup_ssh_server(hostname, hostnames, param, task): "UsePAM yes" + "\n"\ "AuthorizedKeysFile {}".format(os.path.join(ssh_config_path, 'authorized_keys')) + "\n"\ "PidFile {}".format(os.path.join(ssh_config_path, 'sshd.pid')) + "\n"\ - "AcceptEnv TRAINS_API_ACCESS_KEY TRAINS_API_SECRET_KEY "\ + "AcceptEnv CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY "\ "CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY"+"\n" for k in default_ssh_fingerprint: filename = os.path.join(ssh_config_path, '{}'.format(k.replace('__pub', '.pub'))) @@ -511,13 +543,23 @@ def setup_ssh_server(hostname, hostnames, param, task): print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex)) +def _b64_decode_file(encoded_string): + # noinspection PyBroadException + try: + import gzip + value = gzip.decompress(base64.decodebytes(encoded_string.encode('ascii'))).decode('utf8') + return value + except Exception: + return None + + def setup_user_env(param, task): env = setup_os_env(param) # do not change user bash/profile if os.geteuid() != 0: if param.get("user_key") and param.get("user_secret"): - env['TRAINS_API_ACCESS_KEY'] = param.get("user_key") - env['TRAINS_API_SECRET_KEY'] = param.get("user_secret") + env['CLEARML_API_ACCESS_KEY'] = param.get("user_key") + env['CLEARML_API_SECRET_KEY'] = param.get("user_secret") return env # create symbolic link to the venv @@ -530,20 +572,20 @@ def setup_user_env(param, task): pass # set default user credentials if param.get("user_key") and param.get("user_secret"): - os.system("echo 'export TRAINS_API_ACCESS_KEY=\"{}\"' >> ~/.bashrc".format( + os.system("echo 'export CLEARML_API_ACCESS_KEY=\"{}\"' >> ~/.bashrc".format( param.get("user_key", "").replace('$', '\\$'))) - os.system("echo 'export TRAINS_API_SECRET_KEY=\"{}\"' >> ~/.bashrc".format( + os.system("echo 'export CLEARML_API_SECRET_KEY=\"{}\"' >> ~/.bashrc".format( param.get("user_secret", "").replace('$', '\\$'))) - os.system("echo 'export TRAINS_DOCKER_IMAGE=\"{}\"' >> ~/.bashrc".format( - param.get("default_docker", "").strip() or env.get('TRAINS_DOCKER_IMAGE', ''))) - os.system("echo 'export TRAINS_API_ACCESS_KEY=\"{}\"' >> ~/.profile".format( + os.system("echo 'export CLEARML_DOCKER_IMAGE=\"{}\"' >> ~/.bashrc".format( + param.get("default_docker", "").strip() or env.get('CLEARML_DOCKER_IMAGE', ''))) + os.system("echo 'export CLEARML_API_ACCESS_KEY=\"{}\"' >> ~/.profile".format( param.get("user_key", "").replace('$', '\\$'))) - os.system("echo 'export TRAINS_API_SECRET_KEY=\"{}\"' >> ~/.profile".format( + os.system("echo 'export CLEARML_API_SECRET_KEY=\"{}\"' >> ~/.profile".format( param.get("user_secret", "").replace('$', '\\$'))) - os.system("echo 'export TRAINS_DOCKER_IMAGE=\"{}\"' >> ~/.profile".format( - param.get("default_docker", "").strip() or env.get('TRAINS_DOCKER_IMAGE', ''))) - env['TRAINS_API_ACCESS_KEY'] = param.get("user_key") - env['TRAINS_API_SECRET_KEY'] = param.get("user_secret") + os.system("echo 'export CLEARML_DOCKER_IMAGE=\"{}\"' >> ~/.profile".format( + param.get("default_docker", "").strip() or env.get('CLEARML_DOCKER_IMAGE', ''))) + env['CLEARML_API_ACCESS_KEY'] = param.get("user_key") + env['CLEARML_API_SECRET_KEY'] = param.get("user_secret") # set default folder for user if param.get("user_base_directory"): base_dir = param.get("user_base_directory") @@ -557,8 +599,27 @@ def setup_user_env(param, task): os.system("echo '. {}' >> ~/.profile".format(os.path.join(environment, 'bin', 'activate'))) # check if we need to create .git-credentials - # noinspection PyProtectedMember - git_credentials = task._get_configuration_text('git_credentials') + + runtime_property_support = Session.check_min_api_version("2.13") + if runtime_property_support: + # noinspection PyProtectedMember + runtime_prop = dict(task._get_runtime_properties()) + git_credentials = runtime_prop.pop('_git_credentials', None) + git_config = runtime_prop.pop('_git_config', None) + # force removing properties + # noinspection PyProtectedMember + task._edit(runtime=runtime_prop) + task.reload() + if git_credentials is not None: + git_credentials = _b64_decode_file(git_credentials) + if git_config is not None: + git_config = _b64_decode_file(git_config) + else: + # noinspection PyProtectedMember + git_credentials = task._get_configuration_text('git_credentials') + # noinspection PyProtectedMember + git_config = task._get_configuration_text('git_config') + if git_credentials: git_cred_file = os.path.expanduser('~/.config/git/credentials') # noinspection PyBroadException @@ -568,8 +629,7 @@ def setup_user_env(param, task): f.write(git_credentials) except Exception: print('Could not write {} file'.format(git_cred_file)) - # noinspection PyProtectedMember - git_config = task._get_configuration_text('git_config') + if git_config: git_config_file = os.path.expanduser('~/.config/git/config') # noinspection PyBroadException From 0d2c3ba4a5fc8233531849ad4dd7406e394dea73 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 18 Nov 2021 10:44:04 +0200 Subject: [PATCH 02/17] Update minimum clearml version --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 2030eeb..19bab3c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -clearml +clearml >= 1.1.4 pexpect ; sys_platform != 'win32' wexpect ; sys_platform == 'win32' From d646a0d48df1da1ddd4295b1651919a036011379 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 18 Nov 2021 10:45:01 +0200 Subject: [PATCH 03/17] version bump --- clearml_session/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml_session/version.py b/clearml_session/version.py index bfeb9e7..45897bf 100644 --- a/clearml_session/version.py +++ b/clearml_session/version.py @@ -1 +1 @@ -__version__ = '0.3.4' +__version__ = '0.3.5rc0' From d0ce13f004920d181a5fda747a1e0f4d6bc062f3 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 12 Dec 2021 23:22:51 +0200 Subject: [PATCH 04/17] Fix README --- README.md | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 2929ab1..0c6ca17 100644 --- a/README.md +++ b/README.md @@ -202,7 +202,7 @@ In the `clearml` web UI, find the experiment (Task) you wish to debug. Click on the ID button next to the Task name, and copy the unique ID. ``` bash -clearml-session --debugging +clearml-session --debugging-session ``` Click on the JupyterLab/VSCode link, or connect directly to the SSH session @@ -216,9 +216,12 @@ clearml-session --help ``` console clearml-session - CLI for launching JupyterLab / VSCode on a remote machine usage: clearml-session [-h] [--version] [--attach [ATTACH]] - [--debugging DEBUGGING] [--queue QUEUE] - [--docker DOCKER] [--public-ip [true/false]] + [--debugging-session TASK_ID] [--queue QUEUE] + [--docker DOCKER] [--docker-args DOCKER_ARGS] + [--public-ip [true/false]] + [--remote-ssh-port REMOTE_SSH_PORT] [--vscode-server [true/false]] + [--vscode-version VSCODE_VERSION] [--jupyter-lab [true/false]] [--git-credentials [true/false]] [--user-folder USER_FOLDER] @@ -233,6 +236,7 @@ usage: clearml-session [-h] [--version] [--attach [ATTACH]] [--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]] [--skip-docker-network] [--password PASSWORD] [--username USERNAME] + [--verbose] clearml-session - CLI for launching JupyterLab / VSCode on a remote machine @@ -241,23 +245,34 @@ optional arguments: --version Display the clearml-session utility version --attach [ATTACH] Attach to running interactive session (default: previous session) - --debugging DEBUGGING + --debugging-session TASK_ID Pass existing Task id (experiment), create a copy of the experiment on a remote machine, and launch jupyter/ssh for interactive access. Example - --debugging + --debugging-session --queue QUEUE Select the queue to launch the interactive session on (default: previously used queue) --docker DOCKER Select the docker image to use in the interactive session on (default: previously used docker image or `nvidia/cuda:10.1-runtime-ubuntu18.04`) + --docker-args DOCKER_ARGS + Add additional arguments for the docker image to use + in the interactive session on (default: previously + used docker-args) --public-ip [true/false] If True register the public IP of the remote machine. Set if running on the cloud. Default: false (use for local / on-premises) + --remote-ssh-port REMOTE_SSH_PORT + Set the remote ssh server port, running on the agent`s + machine. (default: 10022) --vscode-server [true/false] Install vscode server (code-server) on interactive session (default: true) + --vscode-version VSCODE_VERSION + Set vscode server (code-server) version, as well as + vscode python extension version + (example: "3.7.4:2020.10.332292344") --jupyter-lab [true/false] Install Jupyter-Lab on interactive session (default: true) @@ -284,8 +299,9 @@ optional arguments: Advanced: Change the configuration file used to store the previous state (default: ~/.clearml_session.json) --remote-gateway [REMOTE_GATEWAY] - Advanced: Specify gateway ip/address to be passed to - interactive session (for use with k8s ingestion / ELB) + Advanced: Specify gateway ip/address:port to be passed + to interactive session (for use with k8s ingestion / + ELB) --base-task-id BASE_TASK_ID Advanced: Set the base task ID for the interactive session. (default: previously used Task). Use `none` @@ -310,6 +326,9 @@ optional arguments: used one) --username USERNAME Advanced: Select ssh username for the interactive session (default: `root` or previously used one) + --verbose Advanced: If set, print verbose progress information, + e.g. the remote machine setup process log + Notice! all arguments are stored as new defaults for the next session ``` From 363764b332e74f1063c2a9e7e50f36f8afa4d425 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 12 Dec 2021 23:23:16 +0200 Subject: [PATCH 05/17] Add verbose reporting when deleting stale sessions --- clearml_session/__main__.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/clearml_session/__main__.py b/clearml_session/__main__.py index 87990bf..6d7cb40 100644 --- a/clearml_session/__main__.py +++ b/clearml_session/__main__.py @@ -260,7 +260,7 @@ def create_debugging_task(state, debug_task_id): return task -def delete_old_tasks(client, base_task_id): +def delete_old_tasks(state, client, base_task_id): print('Removing stale interactive sessions') current_user_id = _get_user_id(client) previous_tasks = client.tasks.get_all(**{ @@ -268,9 +268,13 @@ def delete_old_tasks(client, base_task_id): 'parent': base_task_id or None, 'system_tags': None if base_task_id else [system_tag], 'page_size': 100, 'page': 0, - 'user': [current_user_id], 'only_fields': ['id'] + 'user': [current_user_id], + 'only_fields': ['id'] }) - for t in previous_tasks: + + for i, t in enumerate(previous_tasks): + if state.get('verbose'): + print('Removing {}/{} stale sessions'.format(i+1, len(previous_tasks))) try: client.tasks.delete(task=t.id, force=True) except Exception as ex: @@ -1012,7 +1016,7 @@ def cli(): project_id = get_project_id(state) # remove old Tasks created by us. - delete_old_tasks(client, state.get('base_task_id')) + delete_old_tasks(state, client, state.get('base_task_id')) # Clone the Task and adjust parameters task = clone_task(state, project_id) From 9192a6472971c16005c823d14475f7f76039e463 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 12 Dec 2021 23:23:32 +0200 Subject: [PATCH 06/17] Fix support for pre-installed vscode-server in container --- clearml_session/interactive_session_task.py | 131 ++++++++++++-------- 1 file changed, 79 insertions(+), 52 deletions(-) diff --git a/clearml_session/interactive_session_task.py b/clearml_session/interactive_session_task.py index 5b581d0..60b3402 100644 --- a/clearml_session/interactive_session_task.py +++ b/clearml_session/interactive_session_task.py @@ -263,27 +263,40 @@ def start_vscode_server(hostname, hostnames, param, task, env): env = dict(**env) env.pop('PYTHONPATH', None) + pre_installed = False + python_ext = None + # find a free tcp port port = get_free_port(9000, 9100) if os.geteuid() == 0: - # installing VSCODE: + # check if preinstalled + # noinspection PyBroadException try: - python_ext = StorageManager.get_local_copy( - 'https://github.com/microsoft/vscode-python/releases/download/{}/ms-python-release.vsix'.format( - python_ext_version), - extract_archive=False) - code_server_deb = StorageManager.get_local_copy( - 'https://github.com/cdr/code-server/releases/download/' - 'v{version}/code-server_{version}_amd64.deb'.format(version=vscode_version), - extract_archive=False) - os.system("dpkg -i {}".format(code_server_deb)) - except Exception as ex: - print("Failed installing vscode server: {}".format(ex)) - return - vscode_path = 'code-server' + vscode_path = subprocess.check_output('which code-server', shell=True).decode().strip() + pre_installed = bool(vscode_path) + except Exception: + vscode_path = None + + if not vscode_path: + # installing VSCODE: + try: + python_ext = StorageManager.get_local_copy( + 'https://github.com/microsoft/vscode-python/releases/download/{}/ms-python-release.vsix'.format( + python_ext_version), + extract_archive=False) + code_server_deb = StorageManager.get_local_copy( + 'https://github.com/cdr/code-server/releases/download/' + 'v{version}/code-server_{version}_amd64.deb'.format(version=vscode_version), + extract_archive=False) + os.system("dpkg -i {}".format(code_server_deb)) + except Exception as ex: + print("Failed installing vscode server: {}".format(ex)) + return + vscode_path = 'code-server' else: python_ext = None + pre_installed = True # check if code-server exists # noinspection PyBroadException try: @@ -312,51 +325,65 @@ def start_vscode_server(hostname, hostnames, param, task, env): try: fd, local_filename = mkstemp() - subprocess.Popen( - [ - vscode_path, - "--auth", - "none", - "--bind-addr", - "127.0.0.1:{}".format(port), - "--user-data-dir", user_folder, - "--extensions-dir", exts_folder, - "--install-extension", "ms-toolsai.jupyter", - # "--install-extension", "donjayamanne.python-extension-pack" - ] + ["--install-extension", "ms-python.python@{}".format(python_ext_version)] if python_ext else [], - env=env, - stdout=fd, - stderr=fd, - ) - settings = Path(os.path.expanduser(os.path.join(user_folder, 'User/settings.json'))) - settings.parent.mkdir(parents=True, exist_ok=True) - # noinspection PyBroadException - try: - with open(settings.as_posix(), 'rt') as f: - base_json = json.load(f) - except Exception: - base_json = {} - # noinspection PyBroadException - try: - base_json.update({ - "extensions.autoCheckUpdates": False, - "extensions.autoUpdate": False, - "python.pythonPath": sys.executable, - "terminal.integrated.shell.linux": "/bin/bash" if Path("/bin/bash").is_file() else None, - }) - with open(settings.as_posix(), 'wt') as f: - json.dump(base_json, f) - except Exception: - pass + if pre_installed: + user_folder = os.path.expanduser("~/.local/share/code-server/") + if not os.path.isdir(user_folder): + user_folder = None + exts_folder = None + else: + exts_folder = os.path.expanduser("~/.local/share/code-server/extensions/") + else: + subprocess.Popen( + [ + vscode_path, + "--auth", + "none", + "--bind-addr", + "127.0.0.1:{}".format(port), + "--user-data-dir", user_folder, + "--extensions-dir", exts_folder, + "--install-extension", "ms-toolsai.jupyter", + # "--install-extension", "donjayamanne.python-extension-pack" + ] + ["--install-extension", "ms-python.python@{}".format(python_ext_version)] if python_ext else [], + env=env, + stdout=fd, + stderr=fd, + ) + + if user_folder: + settings = Path(os.path.expanduser(os.path.join(user_folder, 'User/settings.json'))) + settings.parent.mkdir(parents=True, exist_ok=True) + # noinspection PyBroadException + try: + with open(settings.as_posix(), 'rt') as f: + base_json = json.load(f) + except Exception: + base_json = {} + # noinspection PyBroadException + try: + base_json.update({ + "extensions.autoCheckUpdates": False, + "extensions.autoUpdate": False, + "python.pythonPath": sys.executable, + "terminal.integrated.shell.linux": "/bin/bash" if Path("/bin/bash").is_file() else None, + }) + with open(settings.as_posix(), 'wt') as f: + json.dump(base_json, f) + except Exception: + pass + proc = subprocess.Popen( ['bash', '-c', - '{} --auth none --bind-addr 127.0.0.1:{} --disable-update-check ' - '--user-data-dir {} --extensions-dir {}'.format(vscode_path, port, user_folder, exts_folder)], + '{} --auth none --bind-addr 127.0.0.1:{} --disable-update-check {} {}'.format( + vscode_path, port, + '--user-data-dir \"{}\"'.format(user_folder) if user_folder else '', + '--extensions-dir \"{}\"'.format(exts_folder) if exts_folder else '')], env=env, stdout=fd, stderr=fd, cwd=cwd, ) + try: error_code = proc.wait(timeout=1) raise ValueError("code-server failed starting, return code {}".format(error_code)) From 9bd389e7f271b9184ba4c31ad981a99ccefcd946 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 12 Dec 2021 23:25:28 +0200 Subject: [PATCH 07/17] Version bump --- clearml_session/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml_session/version.py b/clearml_session/version.py index 45897bf..58913f0 100644 --- a/clearml_session/version.py +++ b/clearml_session/version.py @@ -1 +1 @@ -__version__ = '0.3.5rc0' +__version__ = '0.3.5rc1' From 6a851986360a89720cba78d8e487b0f8f6437c94 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 13 Dec 2021 00:58:22 +0200 Subject: [PATCH 08/17] Change default for --keepalive (default is now False) --- clearml_session/__main__.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/clearml_session/__main__.py b/clearml_session/__main__.py index 6d7cb40..e6c3359 100644 --- a/clearml_session/__main__.py +++ b/clearml_session/__main__.py @@ -723,7 +723,7 @@ def monitor_ssh_tunnel(state, task): vscode_port = None connect_state = {'reconnect': False} - if not state.get('disable_keepalive'): + if state.get('keepalive'): if state.get('jupyter_lab'): SingleThreadProxy(local_jupyter_port, local_jupyter_port_) if state.get('vscode_server'): @@ -760,7 +760,7 @@ def monitor_ssh_tunnel(state, task): ssh_port = \ task_parameters.get('properties/k8s-pod-port') or \ task_parameters.get('properties/external_ssh_port') or internal_ssh_port - if not state.get('disable_keepalive'): + if state.get('keepalive'): internal_ssh_port = task_parameters.get('properties/internal_stable_ssh_port') or internal_ssh_port local_remote_pair_list = [(local_ssh_port_, internal_ssh_port)] if state.get('jupyter_lab'): @@ -931,9 +931,10 @@ def setup_parser(parser): '(default: previously used Task). Use `none` for the default interactive session') parser.add_argument('--project', type=str, default=None, help='Advanced: Set the project name for the interactive session Task') - parser.add_argument('--disable-keepalive', action='store_true', default=None, - help='Advanced: If set, disable the transparent proxy always keeping the sockets alive. ' - 'Default: false, use transparent socket mitigating connection drops.') + parser.add_argument('--keepalive', default=False, nargs='?', const='true', metavar='true/false', + type=lambda x: (str(x).strip().lower() in ('true', 'yes')), + help='Advanced: If set, enables the transparent proxy always keeping the sockets alive. ' + 'Default: False, do not use transparent socket for mitigating connection drops.') parser.add_argument('--queue-excluded-tag', default=None, nargs='*', help='Advanced: Excluded queues with this specific tag from the selection') parser.add_argument('--queue-include-tag', default=None, nargs='*', From 62ed8be256deaacc3ac38b008ce7c9f9730b4dcd Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Mon, 13 Dec 2021 01:56:02 +0200 Subject: [PATCH 09/17] Version bump --- clearml_session/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml_session/version.py b/clearml_session/version.py index 58913f0..88c09c9 100644 --- a/clearml_session/version.py +++ b/clearml_session/version.py @@ -1 +1 @@ -__version__ = '0.3.5rc1' +__version__ = '0.3.5rc2' From 1d715700c16a8127b8ebdbcbc901de5ce88c580f Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 19 Dec 2021 00:48:24 +0200 Subject: [PATCH 10/17] Add vault support --- clearml_session/interactive_session_task.py | 21 +++++++++++++++++++++ requirements.txt | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/clearml_session/interactive_session_task.py b/clearml_session/interactive_session_task.py index 60b3402..bd23e6a 100644 --- a/clearml_session/interactive_session_task.py +++ b/clearml_session/interactive_session_task.py @@ -582,6 +582,25 @@ def _b64_decode_file(encoded_string): def setup_user_env(param, task): env = setup_os_env(param) + + # apply vault if we have it + vault_environment = {} + if param.get("user_key") and param.get("user_secret"): + # noinspection PyBroadException + try: + print('Applying vault configuration') + from clearml.backend_api.session.defs import ENV_ENABLE_ENV_CONFIG_SECTION, ENV_ENABLE_FILES_CONFIG_SECTION + prev_env, prev_files = ENV_ENABLE_ENV_CONFIG_SECTION.get(), ENV_ENABLE_FILES_CONFIG_SECTION.get() + ENV_ENABLE_ENV_CONFIG_SECTION.set(True), ENV_ENABLE_FILES_CONFIG_SECTION.set(True) + prev_envs = deepcopy(os.environ) + Session(api_key=param.get("user_key"), secret_key=param.get("user_secret")) + vault_environment = {k: v for k, v in os.environ.items() if prev_envs.get(k) != v} + ENV_ENABLE_ENV_CONFIG_SECTION.set(prev_env), ENV_ENABLE_FILES_CONFIG_SECTION.set(prev_files) + if vault_environment: + print('Vault environment added: {}'.format(list(vault_environment.keys()))) + except Exception as ex: + print('Applying vault configuration failed: {}'.format(ex)) + # do not change user bash/profile if os.geteuid() != 0: if param.get("user_key") and param.get("user_secret"): @@ -611,6 +630,8 @@ def setup_user_env(param, task): param.get("user_secret", "").replace('$', '\\$'))) os.system("echo 'export CLEARML_DOCKER_IMAGE=\"{}\"' >> ~/.profile".format( param.get("default_docker", "").strip() or env.get('CLEARML_DOCKER_IMAGE', ''))) + for k, v in vault_environment.items(): + os.system("echo 'export {}=\"{}\"' >> ~/.profile".format(k, v)) env['CLEARML_API_ACCESS_KEY'] = param.get("user_key") env['CLEARML_API_SECRET_KEY'] = param.get("user_secret") # set default folder for user diff --git a/requirements.txt b/requirements.txt index 19bab3c..f49ce3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -clearml >= 1.1.4 +clearml >= 1.1.5rc4 pexpect ; sys_platform != 'win32' wexpect ; sys_platform == 'win32' From ff8d97f347eab261adf9fb724d36a66f3023b4ae Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sun, 19 Dec 2021 00:48:41 +0200 Subject: [PATCH 11/17] version bump --- clearml_session/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml_session/version.py b/clearml_session/version.py index 88c09c9..39c472e 100644 --- a/clearml_session/version.py +++ b/clearml_session/version.py @@ -1 +1 @@ -__version__ = '0.3.5rc2' +__version__ = '0.3.5rc3' From a5792ed82a65588050cce968e72cd5f745d7ded2 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Wed, 29 Dec 2021 23:06:58 +0200 Subject: [PATCH 12/17] Fix vault environment variable integration --- clearml_session/interactive_session_task.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clearml_session/interactive_session_task.py b/clearml_session/interactive_session_task.py index bd23e6a..bfb5eec 100644 --- a/clearml_session/interactive_session_task.py +++ b/clearml_session/interactive_session_task.py @@ -632,6 +632,8 @@ def setup_user_env(param, task): param.get("default_docker", "").strip() or env.get('CLEARML_DOCKER_IMAGE', ''))) for k, v in vault_environment.items(): os.system("echo 'export {}=\"{}\"' >> ~/.profile".format(k, v)) + os.system("echo 'export {}=\"{}\"' >> ~/.bashrc".format(k, v)) + env[k] = str(v) if v else "" env['CLEARML_API_ACCESS_KEY'] = param.get("user_key") env['CLEARML_API_SECRET_KEY'] = param.get("user_secret") # set default folder for user From 5ab6995fb4bf0d8dbaa64e1bdf3474e3b35327a3 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Wed, 29 Dec 2021 23:07:08 +0200 Subject: [PATCH 13/17] Add user-folder to jupyter-lab / vscode links. Take latest clearml RC --- clearml_session/__main__.py | 6 +++++- clearml_session/version.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/clearml_session/__main__.py b/clearml_session/__main__.py index e6c3359..1af33fe 100644 --- a/clearml_session/__main__.py +++ b/clearml_session/__main__.py @@ -149,7 +149,7 @@ def create_base_task(state, project_name=None, task_name=None): task_script['working_dir'] = '.' task_script['entry_point'] = 'interactive_session.py' task_script['requirements'] = {'pip': '\n'.join( - ["clearml"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) + + ["clearml>=1.1.5rc6"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) + (['pylint'] if state.get('vscode_server') else []))} section, _, _ = _get_config_section_name() @@ -805,9 +805,13 @@ def monitor_ssh_tunnel(state, task): msg += \ '\nJupyter Lab URL: http://localhost:{local_jupyter_port}/?token={jupyter_token}'.format( local_jupyter_port=local_jupyter_port, jupyter_token=jupyter_token.rstrip()) + if state.get('user_folder'): + msg += "&file-browser-path={}".format(state.get('user_folder')) if vscode_port: msg += '\nVSCode server available at http://localhost:{local_vscode_port}/'.format( local_vscode_port=local_vscode_port) + if state.get('user_folder'): + msg += "?folder={}".format(state.get('user_folder')) print(msg) print('\nConnection is up and running\n' diff --git a/clearml_session/version.py b/clearml_session/version.py index 39c472e..b9a594b 100644 --- a/clearml_session/version.py +++ b/clearml_session/version.py @@ -1 +1 @@ -__version__ = '0.3.5rc3' +__version__ = '0.3.5rc4' From 5edc1a8edf69190147ef40fea3359697d341f284 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 1 Jan 2022 23:18:19 +0200 Subject: [PATCH 14/17] Bump clearml required version to v1.1.5 --- clearml_session/__main__.py | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clearml_session/__main__.py b/clearml_session/__main__.py index 1af33fe..c0d2185 100644 --- a/clearml_session/__main__.py +++ b/clearml_session/__main__.py @@ -149,7 +149,7 @@ def create_base_task(state, project_name=None, task_name=None): task_script['working_dir'] = '.' task_script['entry_point'] = 'interactive_session.py' task_script['requirements'] = {'pip': '\n'.join( - ["clearml>=1.1.5rc6"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) + + ["clearml>=1.1.5"] + (["jupyter", "jupyterlab", "jupyterlab_git"] if state.get('jupyter_lab') else []) + (['pylint'] if state.get('vscode_server') else []))} section, _, _ = _get_config_section_name() diff --git a/requirements.txt b/requirements.txt index f49ce3d..76bbc81 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -clearml >= 1.1.5rc4 +clearml >= 1.1.5 pexpect ; sys_platform != 'win32' wexpect ; sys_platform == 'win32' From 3283d55006ce1fc8160ac38a238adf5d1f59d6b0 Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 1 Jan 2022 23:20:21 +0200 Subject: [PATCH 15/17] Update README --- README.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0c6ca17..b15e2c8 100644 --- a/README.md +++ b/README.md @@ -216,7 +216,7 @@ clearml-session --help ``` console clearml-session - CLI for launching JupyterLab / VSCode on a remote machine usage: clearml-session [-h] [--version] [--attach [ATTACH]] - [--debugging-session TASK_ID] [--queue QUEUE] + [--debugging-session DEBUGGING_SESSION] [--queue QUEUE] [--docker DOCKER] [--docker-args DOCKER_ARGS] [--public-ip [true/false]] [--remote-ssh-port REMOTE_SSH_PORT] @@ -231,12 +231,11 @@ usage: clearml-session [-h] [--version] [--attach [ATTACH]] [--config-file CONFIG_FILE] [--remote-gateway [REMOTE_GATEWAY]] [--base-task-id BASE_TASK_ID] [--project PROJECT] - [--disable-keepalive] + [--keepalive [true/false]] [--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]] [--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]] [--skip-docker-network] [--password PASSWORD] - [--username USERNAME] - [--verbose] + [--username USERNAME] [--verbose] clearml-session - CLI for launching JupyterLab / VSCode on a remote machine @@ -245,7 +244,7 @@ optional arguments: --version Display the clearml-session utility version --attach [ATTACH] Attach to running interactive session (default: previous session) - --debugging-session TASK_ID + --debugging-session DEBUGGING_SESSION Pass existing Task id (experiment), create a copy of the experiment on a remote machine, and launch jupyter/ssh for interactive access. Example @@ -308,9 +307,10 @@ optional arguments: for the default interactive session --project PROJECT Advanced: Set the project name for the interactive session Task - --disable-keepalive Advanced: If set, disable the transparent proxy always - keeping the sockets alive. Default: false, use - transparent socket mitigating connection drops. + --keepalive [true/false] + Advanced: If set, enables the transparent proxy always + keeping the sockets alive. Default: False, do not use + transparent socket for mitigating connection drops. --queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]] Advanced: Excluded queues with this specific tag from the selection @@ -329,6 +329,5 @@ optional arguments: --verbose Advanced: If set, print verbose progress information, e.g. the remote machine setup process log - Notice! all arguments are stored as new defaults for the next session ``` From 6f8eca9f1bb1cf753b2138f60f07ccd410a7fd0f Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Sat, 1 Jan 2022 23:20:31 +0200 Subject: [PATCH 16/17] Version bump to 0.3.5 --- clearml_session/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml_session/version.py b/clearml_session/version.py index b9a594b..40ed83d 100644 --- a/clearml_session/version.py +++ b/clearml_session/version.py @@ -1 +1 @@ -__version__ = '0.3.5rc4' +__version__ = '0.3.5' From 3f6b39471803a35f28a071c58613c24a28f95654 Mon Sep 17 00:00:00 2001 From: saif Date: Wed, 19 Jan 2022 13:13:00 +0000 Subject: [PATCH 17/17] added ldconfig warning supressing --- clearml_session/interactive_session_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clearml_session/interactive_session_task.py b/clearml_session/interactive_session_task.py index bfb5eec..b89904d 100644 --- a/clearml_session/interactive_session_task.py +++ b/clearml_session/interactive_session_task.py @@ -481,7 +481,7 @@ def setup_ssh_server(hostname, hostnames, param, task): "CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY' >> /etc/ssh/sshd_config && " 'echo "export VISIBLE=now" >> /etc/profile && ' 'echo "export PATH=$PATH" >> /etc/profile && ' - 'echo "ldconfig" >> /etc/profile && ' + 'echo "ldconfig" 2>/dev/null >> /etc/profile && ' 'echo "export CLEARML_CONFIG_FILE={trains_config_file}" >> /etc/profile'.format( password=ssh_password, port=port,