mirror of
https://github.com/clearml/clearml-session
synced 2025-06-26 18:16:55 +00:00
Add --router-enabled to support clearml router service
This commit is contained in:
parent
e0a79f7ce7
commit
90ac85339a
107
README.md
107
README.md
@ -307,21 +307,20 @@ clearml-session --help
|
|||||||
```console
|
```console
|
||||||
clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine
|
clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine
|
||||||
usage: clearml-session [-h] [--version] [--attach [ATTACH]] [--shutdown [SHUTDOWN]] [--shell]
|
usage: clearml-session [-h] [--version] [--attach [ATTACH]] [--shutdown [SHUTDOWN]] [--shell]
|
||||||
[--debugging-session DEBUGGING_SESSION] [--queue QUEUE] [--docker DOCKER]
|
[--debugging-session DEBUGGING_SESSION] [--queue QUEUE] [--router-enabled] [--docker DOCKER]
|
||||||
[--docker-args DOCKER_ARGS] [--public-ip [true/false]] [--remote-ssh-port REMOTE_SSH_PORT]
|
[--docker-args DOCKER_ARGS] [--public-ip [true/false]] [--remote-ssh-port REMOTE_SSH_PORT]
|
||||||
[--vscode-server [true/false]] [--vscode-version VSCODE_VERSION]
|
[--vscode-server [true/false]] [--vscode-version VSCODE_VERSION] [--vscode-extensions VSCODE_EXTENSIONS]
|
||||||
[--vscode-extensions VSCODE_EXTENSIONS] [--jupyter-lab [true/false]]
|
[--jupyter-lab [true/false]] [--upload-files UPLOAD_FILES] [--continue-session CONTINUE_SESSION]
|
||||||
[--upload-files UPLOAD_FILES] [--continue-session CONTINUE_SESSION]
|
[--store-workspace STORE_WORKSPACE] [--git-credentials [true/false]] [--user-folder USER_FOLDER]
|
||||||
[--store-workspace STORE_WORKSPACE] [--git-credentials [true/false]]
|
[--packages [PACKAGES [PACKAGES ...]]] [--requirements REQUIREMENTS] [--init-script [INIT_SCRIPT]]
|
||||||
[--user-folder USER_FOLDER] [--packages [PACKAGES [PACKAGES ...]]]
|
[--config-file CONFIG_FILE] [--remote-gateway [REMOTE_GATEWAY]] [--base-task-id BASE_TASK_ID]
|
||||||
[--requirements REQUIREMENTS] [--init-script [INIT_SCRIPT]] [--config-file CONFIG_FILE]
|
[--project PROJECT] [--session-name SESSION_NAME] [--session-tags [SESSION_TAGS [SESSION_TAGS ...]]]
|
||||||
[--remote-gateway [REMOTE_GATEWAY]] [--base-task-id BASE_TASK_ID] [--project PROJECT]
|
|
||||||
[--session-name SESSION_NAME] [--session-tags [SESSION_TAGS [SESSION_TAGS ...]]]
|
|
||||||
[--disable-session-cleanup [true/false]] [--keepalive [true/false]]
|
[--disable-session-cleanup [true/false]] [--keepalive [true/false]]
|
||||||
[--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]]
|
[--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]]
|
||||||
[--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]]
|
[--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]] [--skip-docker-network [true/false]]
|
||||||
[--skip-docker-network [true/false]] [--password PASSWORD] [--username USERNAME]
|
[--password PASSWORD] [--randomize [RANDOMIZE [RANDOMIZE ...]]] [--username USERNAME]
|
||||||
[--force-dropbear [true/false]] [--verbose] [--yes]
|
[--force-dropbear [true/false]] [--disable-store-defaults] [--disable-fingerprint-check] [--verbose]
|
||||||
|
[--yes]
|
||||||
{list,info,shutdown} ...
|
{list,info,shutdown} ...
|
||||||
|
|
||||||
clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine
|
clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine
|
||||||
@ -338,65 +337,64 @@ optional arguments:
|
|||||||
--attach [ATTACH] Attach to running interactive session (default: previous session)
|
--attach [ATTACH] Attach to running interactive session (default: previous session)
|
||||||
--shutdown [SHUTDOWN], -S [SHUTDOWN]
|
--shutdown [SHUTDOWN], -S [SHUTDOWN]
|
||||||
Shut down an active session (default: previous session)
|
Shut down an active session (default: previous session)
|
||||||
--shell Open the SSH shell session directly, notice quitting the SSH session will Not shut down the
|
--shell Open the SSH shell session directly, notice quiting the SSH session will Not shutdown the remote session
|
||||||
remote session
|
|
||||||
--debugging-session DEBUGGING_SESSION
|
--debugging-session DEBUGGING_SESSION
|
||||||
Pass existing Task id (experiment), create a copy of the experiment on a remote machine,
|
Pass existing Task id (experiment), create a copy of the experiment on a remote machine, and launch
|
||||||
and launch jupyter/ssh for interactive access. Example --debugging-session <task_id>
|
jupyter/ssh for interactive access. Example --debugging-session <task_id>
|
||||||
--queue QUEUE Select the queue to launch the interactive session on (default: previously used queue)
|
--queue QUEUE Select the queue to launch the interactive session on (default: previously used queue)
|
||||||
--docker DOCKER Select the docker image to use in the interactive session (default: previously used
|
--router-enabled If we have a clearml Router set, make sure we request direct TCP routing to our container.
|
||||||
docker image or `nvidia/cuda:11.6.2-runtime-ubuntu20.04`)
|
--docker DOCKER Select the docker image to use in the interactive session on (default: previously used docker image or
|
||||||
|
`nvidia/cuda:11.6.2-runtime-ubuntu20.04`)
|
||||||
--docker-args DOCKER_ARGS
|
--docker-args DOCKER_ARGS
|
||||||
Add additional arguments for the docker image to use in the interactive session on
|
Add additional arguments for the docker image to use in the interactive session on (default: previously
|
||||||
(default: previously used docker-args)
|
used docker-args)
|
||||||
--public-ip [true/false]
|
--public-ip [true/false]
|
||||||
If True, register the public IP of the remote machine. Set if running on the cloud.
|
If True register the public IP of the remote machine. Set if running on the cloud. Default: false (use
|
||||||
Default: false (use for local / on-premises)
|
for local / on-premises)
|
||||||
--remote-ssh-port REMOTE_SSH_PORT
|
--remote-ssh-port REMOTE_SSH_PORT
|
||||||
Set the remote ssh server port, running on the agent`s machine. (default: 10022)
|
Set the remote ssh server port, running on the agent`s machine. (default: 10022)
|
||||||
--vscode-server [true/false]
|
--vscode-server [true/false]
|
||||||
Install vscode server (code-server) on interactive session (default: true)
|
Install vscode server (code-server) on interactive session (default: true)
|
||||||
--vscode-version VSCODE_VERSION
|
--vscode-version VSCODE_VERSION
|
||||||
Set vscode server (code-server) version, as well as vscode python extension version
|
Set vscode server (code-server) version, as well as vscode python extension version <vscode:python-ext>
|
||||||
<vscode:python-ext> (example: "3.7.4:2020.10.332292344")
|
(example: "3.7.4:2020.10.332292344")
|
||||||
--vscode-extensions VSCODE_EXTENSIONS
|
--vscode-extensions VSCODE_EXTENSIONS
|
||||||
Install additional vscode extensions, as well as vscode python extension (example: "ms-
|
Install additional vscode extensions, as well as vscode python extension (example: "ms-python.python,ms-
|
||||||
python.python,ms-python.black-formatter,ms-python.pylint,ms-python.flake8")
|
python.black-formatter,ms-python.pylint,ms-python.flake8")
|
||||||
--jupyter-lab [true/false]
|
--jupyter-lab [true/false]
|
||||||
Install Jupyter-Lab on interactive session (default: true)
|
Install Jupyter-Lab on interactive session (default: true)
|
||||||
--upload-files UPLOAD_FILES
|
--upload-files UPLOAD_FILES
|
||||||
Advanced: Upload local files/folders to the remote session. Example: `/my/local/data/`
|
Advanced: Upload local files/folders to the remote session. Example: `/my/local/data/` will upload the
|
||||||
will upload the local folder and extract it into the container in ~/session-files/
|
local folder and extract it into the container in ~/session-files/
|
||||||
--continue-session CONTINUE_SESSION
|
--continue-session CONTINUE_SESSION
|
||||||
Continue previous session (ID provided) restoring your workspace (see --store-workspace)
|
Continue previous session (ID provided) restoring your workspace (see --store-workspace)
|
||||||
--store-workspace STORE_WORKSPACE
|
--store-workspace STORE_WORKSPACE
|
||||||
Upload/Restore remote workspace folder. Example: `~/workspace/` will automatically
|
Upload/Restore remote workspace folder. Example: `~/workspace/` will automatically restore/store the
|
||||||
restore/store the *containers* folder and extract it into the next session. Use with
|
*containers* folder and extract it into next the session. Use with --continue-session to continue your
|
||||||
--continue-session to continue your previous work from your exact container state
|
previous work from your exact container state
|
||||||
--git-credentials [true/false]
|
--git-credentials [true/false]
|
||||||
If true, local .git-credentials file is sent to the interactive session. (default: false)
|
If true, local .git-credentials file is sent to the interactive session. (default: false)
|
||||||
--user-folder USER_FOLDER
|
--user-folder USER_FOLDER
|
||||||
Advanced: Set the remote base folder (default: ~/)
|
Advanced: Set the remote base folder (default: ~/)
|
||||||
--packages [PACKAGES [PACKAGES ...]]
|
--packages [PACKAGES [PACKAGES ...]]
|
||||||
Additional packages to add, supports version numbers (default: previously added packages).
|
Additional packages to add, supports version numbers (default: previously added packages). examples:
|
||||||
examples: --packages torch==1.7 tqdm
|
--packages torch==1.7 tqdm
|
||||||
--requirements REQUIREMENTS
|
--requirements REQUIREMENTS
|
||||||
Specify requirements.txt file to install when setting the interactive session.
|
Specify requirements.txt file to install when setting the interactive session. Requirements file is read
|
||||||
Requirements file is read and stored in `packages` section as default for the next
|
and stored in `packages` section as default for the next sessions. Can be overridden by calling
|
||||||
sessions. Can be overridden by calling `--packages`
|
`--packages`
|
||||||
--init-script [INIT_SCRIPT]
|
--init-script [INIT_SCRIPT]
|
||||||
Specify BASH init script file to be executed when setting the interactive session. Script
|
Specify BASH init script file to be executed when setting the interactive session. Script content is
|
||||||
content is read and stored as default script for the next sessions. To clear the init-
|
read and stored as default script for the next sessions. To clear the init-script do not pass a file
|
||||||
script do not pass a file
|
|
||||||
--config-file CONFIG_FILE
|
--config-file CONFIG_FILE
|
||||||
Advanced: Change the configuration file used to store the previous state (default:
|
Advanced: Change the configuration file used to store the previous state (default:
|
||||||
~/.clearml_session.json)
|
~/.clearml_session.json)
|
||||||
--remote-gateway [REMOTE_GATEWAY]
|
--remote-gateway [REMOTE_GATEWAY]
|
||||||
Advanced: Specify gateway ip/address:port to be passed to interactive session (for use
|
Advanced: Specify gateway ip/address:port to be passed to interactive session (for use with k8s
|
||||||
with k8s ingestion / ELB)
|
ingestion / ELB)
|
||||||
--base-task-id BASE_TASK_ID
|
--base-task-id BASE_TASK_ID
|
||||||
Advanced: Set the base task ID for the interactive session. (default: previously used
|
Advanced: Set the base task ID for the interactive session. (default: previously used Task). Use `none`
|
||||||
Task). Use `none` for the default interactive session
|
for the default interactive session
|
||||||
--project PROJECT Advanced: Set the project name for the interactive session Task
|
--project PROJECT Advanced: Set the project name for the interactive session Task
|
||||||
--session-name SESSION_NAME
|
--session-name SESSION_NAME
|
||||||
Advanced: Set the name of the interactive session Task
|
Advanced: Set the name of the interactive session Task
|
||||||
@ -405,33 +403,32 @@ optional arguments:
|
|||||||
--disable-session-cleanup [true/false]
|
--disable-session-cleanup [true/false]
|
||||||
Advanced: If set, previous interactive sessions are not deleted
|
Advanced: If set, previous interactive sessions are not deleted
|
||||||
--keepalive [true/false]
|
--keepalive [true/false]
|
||||||
Advanced: If set, enables the transparent proxy always keeping the sockets alive. Default:
|
Advanced: If set, enables the transparent proxy always keeping the sockets alive. Default: False, do not
|
||||||
False, do not use transparent sockets for mitigating connection drops.
|
use transparent socket for mitigating connection drops.
|
||||||
--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]
|
--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]
|
||||||
Advanced: Excluded queues with this specific tag from the selection
|
Advanced: Excluded queues with this specific tag from the selection
|
||||||
--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]
|
--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]
|
||||||
Advanced: Only include queues with this specific tag from the selection
|
Advanced: Only include queues with this specific tag from the selection
|
||||||
--skip-docker-network [true/false]
|
--skip-docker-network [true/false]
|
||||||
Advanced: If set, `--network host` is **not** passed to docker (assumes k8s network
|
Advanced: If set, `--network host` is **not** passed to docker (assumes k8s network ingestion) (default:
|
||||||
ingestion) (default: false)
|
false)
|
||||||
--password PASSWORD Advanced: Select ssh password for the interactive session (default: `randomly-generated`
|
--password PASSWORD Advanced: Select ssh password for the interactive session (default: `randomly-generated` or previously
|
||||||
or previously used one)
|
|
||||||
--username USERNAME Advanced: Select ssh username for the interactive session (default: `root` or previously
|
|
||||||
used one)
|
used one)
|
||||||
--randomize Advanced: Recreate a new random ssh password for the interactive session options:
|
--randomize [RANDOMIZE [RANDOMIZE ...]]
|
||||||
`--randomize` one time recreate, --randomize `always` create a new random password for
|
Advanced: Recreate a new random ssh password for the interactive session options: `--randomize` one time
|
||||||
every session
|
recreate random password, --randomize `always` create a new random password for every session
|
||||||
|
--username USERNAME Advanced: Select ssh username for the interactive session (default: `root` or previously used one)
|
||||||
--force-dropbear [true/false]
|
--force-dropbear [true/false]
|
||||||
Force using `dropbear` instead of SSHd
|
Force using `dropbear` instead of SSHd
|
||||||
--disable-store-defaults
|
--disable-store-defaults
|
||||||
If set, do not store current setup as new default configuration
|
If set, do not store current setup as new default configuration
|
||||||
--disable-fingerprint-check
|
--disable-fingerprint-check
|
||||||
Advanced: If set, ignore the remote SSH server fingerprint check
|
Advanced: If set, ignore the remote SSH server fingerprint check
|
||||||
--verbose Advanced: If set, print verbose progress information, e.g. the remote machine setup
|
--verbose Advanced: If set, print verbose progress information, e.g. the remote machine setup process log
|
||||||
process log
|
|
||||||
--yes, -y Automatic yes to prompts; assume "yes" as answer to all prompts and run non-interactively
|
--yes, -y Automatic yes to prompts; assume "yes" as answer to all prompts and run non-interactively
|
||||||
|
|
||||||
Notice! all arguments are stored as new defaults for the next execution
|
Notice! all arguments are stored as new defaults for the next execution
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -715,6 +715,7 @@ def clone_task(state, project_id=None):
|
|||||||
task_params["{}/force_dropbear".format(section)] = bool(state.get('force_dropbear'))
|
task_params["{}/force_dropbear".format(section)] = bool(state.get('force_dropbear'))
|
||||||
task_params["{}/store_workspace".format(section)] = state.get('store_workspace')
|
task_params["{}/store_workspace".format(section)] = state.get('store_workspace')
|
||||||
task_params["{}/use_ssh_proxy".format(section)] = state.get('keepalive')
|
task_params["{}/use_ssh_proxy".format(section)] = state.get('keepalive')
|
||||||
|
task_params["{}/router_enabled".format(section)] = bool(state.get('router_enabled'))
|
||||||
if state.get('user_folder'):
|
if state.get('user_folder'):
|
||||||
task_params['{}/user_base_directory'.format(section)] = state.get('user_folder')
|
task_params['{}/user_base_directory'.format(section)] = state.get('user_folder')
|
||||||
docker = state.get('docker') or task.get_base_docker()
|
docker = state.get('docker') or task.get_base_docker()
|
||||||
@ -977,7 +978,8 @@ def start_ssh_tunnel(username, remote_address, ssh_port, ssh_password, local_rem
|
|||||||
if debug:
|
if debug:
|
||||||
print("ERROR: running local SSH client [{}] failed connecting to {}: {}".format(command, args, ex))
|
print("ERROR: running local SSH client [{}] failed connecting to {}: {}".format(command, args, ex))
|
||||||
else:
|
else:
|
||||||
print("ERROR: running local SSH client failed connecting to {}: {}".format(remote_address, ex))
|
print("ERROR: running local SSH client failed connecting to {} [{}]\n"
|
||||||
|
" for additional details re-run with --verbose".format(remote_address, type(ex)))
|
||||||
|
|
||||||
if child:
|
if child:
|
||||||
child.terminate(force=True)
|
child.terminate(force=True)
|
||||||
@ -1165,6 +1167,8 @@ def monitor_ssh_tunnel(state, task, ssh_setup_completed_callback=None):
|
|||||||
else:
|
else:
|
||||||
logging.getLogger().warning('SSH tunneling failed, retrying in {} seconds'.format(3))
|
logging.getLogger().warning('SSH tunneling failed, retrying in {} seconds'.format(3))
|
||||||
sleep(3.)
|
sleep(3.)
|
||||||
|
# clear ssh port, so that we reload it from Task (i.e. sync with router if it's there)
|
||||||
|
ssh_port = None
|
||||||
continue
|
continue
|
||||||
|
|
||||||
connect_state['reconnect'] = False
|
connect_state['reconnect'] = False
|
||||||
@ -1355,6 +1359,10 @@ def setup_parser(parser):
|
|||||||
'and launch jupyter/ssh for interactive access. Example --debugging-session <task_id>')
|
'and launch jupyter/ssh for interactive access. Example --debugging-session <task_id>')
|
||||||
parser.add_argument('--queue', type=str, default=None,
|
parser.add_argument('--queue', type=str, default=None,
|
||||||
help='Select the queue to launch the interactive session on (default: previously used queue)')
|
help='Select the queue to launch the interactive session on (default: previously used queue)')
|
||||||
|
parser.add_argument("--router-enabled", default=None, nargs='?', const='true', metavar='true/false',
|
||||||
|
type=lambda x: (str(x).strip().lower() in ('true', 'yes')),
|
||||||
|
help="If we have a clearml Router set, make sure we request direct TCP routing "
|
||||||
|
"to our container. ")
|
||||||
parser.add_argument('--docker', type=str, default=None,
|
parser.add_argument('--docker', type=str, default=None,
|
||||||
help='Select the docker image to use in the interactive session on '
|
help='Select the docker image to use in the interactive session on '
|
||||||
'(default: previously used docker image or `{}`)'.format(default_docker_image))
|
'(default: previously used docker image or `{}`)'.format(default_docker_image))
|
||||||
|
@ -591,6 +591,7 @@ def setup_ssh_server(hostname, hostnames, param, task, env):
|
|||||||
print("Installing SSH Server on {} [{}]".format(hostname, hostnames))
|
print("Installing SSH Server on {} [{}]".format(hostname, hostnames))
|
||||||
ssh_password = param.get("ssh_password", "training")
|
ssh_password = param.get("ssh_password", "training")
|
||||||
|
|
||||||
|
proxy_port = port = None
|
||||||
ssh_port = None
|
ssh_port = None
|
||||||
if Session.check_min_api_version("2.13"):
|
if Session.check_min_api_version("2.13"):
|
||||||
try:
|
try:
|
||||||
@ -797,6 +798,8 @@ def setup_ssh_server(hostname, hostnames, param, task, env):
|
|||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex))
|
print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex))
|
||||||
|
|
||||||
|
return proxy_port or port
|
||||||
|
|
||||||
|
|
||||||
def _b64_decode_file(encoded_string):
|
def _b64_decode_file(encoded_string):
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
@ -1019,6 +1022,7 @@ def get_host_name(task, param):
|
|||||||
# update host name
|
# update host name
|
||||||
if (not task.get_parameter(name='properties/external_address') and
|
if (not task.get_parameter(name='properties/external_address') and
|
||||||
not task.get_parameter(name='properties/k8s-gateway-address')):
|
not task.get_parameter(name='properties/k8s-gateway-address')):
|
||||||
|
|
||||||
if task._get_runtime_properties().get("external_address"):
|
if task._get_runtime_properties().get("external_address"):
|
||||||
external_addr = task._get_runtime_properties().get("external_address")
|
external_addr = task._get_runtime_properties().get("external_address")
|
||||||
else:
|
else:
|
||||||
@ -1274,9 +1278,15 @@ def main():
|
|||||||
"force_dropbear": False,
|
"force_dropbear": False,
|
||||||
"store_workspace": None,
|
"store_workspace": None,
|
||||||
"use_ssh_proxy": False,
|
"use_ssh_proxy": False,
|
||||||
|
"router_enabled": False,
|
||||||
}
|
}
|
||||||
task = init_task(param, default_ssh_fingerprint)
|
task = init_task(param, default_ssh_fingerprint)
|
||||||
|
|
||||||
|
# if router is enabled, do not request a public IP, enforce local IP
|
||||||
|
if param.get("router_enabled") and param.get("public_ip"):
|
||||||
|
print("External TCP router configured, disabling `public_ip` request")
|
||||||
|
param["public_ip"] = False
|
||||||
|
|
||||||
run_user_init_script(task)
|
run_user_init_script(task)
|
||||||
|
|
||||||
# restore workspace if exists
|
# restore workspace if exists
|
||||||
@ -1294,7 +1304,20 @@ def main():
|
|||||||
|
|
||||||
env = setup_user_env(param, task)
|
env = setup_user_env(param, task)
|
||||||
|
|
||||||
setup_ssh_server(hostname, hostnames, param, task, env)
|
ssh_port = setup_ssh_server(hostname, hostnames, param, task, env)
|
||||||
|
|
||||||
|
# make sure we set it to the runtime properties
|
||||||
|
if ssh_port and param.get("router_enabled"):
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
address = task._get_runtime_properties().get("external_address") or ""
|
||||||
|
print("Requesting TCP route from router ingress to {} port {}".format(address, ssh_port))
|
||||||
|
# noinspection PyProtectedMember
|
||||||
|
task._set_runtime_properties({
|
||||||
|
"external_address": address,
|
||||||
|
"external_tcp_port": ssh_port,
|
||||||
|
"_SERVICE": "EXTERNAL_TCP",
|
||||||
|
})
|
||||||
|
task.set_system_tags((task.get_system_tags() or []) + ["external_service"])
|
||||||
|
|
||||||
start_vscode_server(hostname, hostnames, param, task, env)
|
start_vscode_server(hostname, hostnames, param, task, env)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user