mirror of
https://github.com/clearml/clearml-session
synced 2025-01-30 18:36:54 +00:00
Add --router-enabled to support clearml router service
This commit is contained in:
parent
e0a79f7ce7
commit
90ac85339a
107
README.md
107
README.md
@ -307,21 +307,20 @@ clearml-session --help
|
||||
```console
|
||||
clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine
|
||||
usage: clearml-session [-h] [--version] [--attach [ATTACH]] [--shutdown [SHUTDOWN]] [--shell]
|
||||
[--debugging-session DEBUGGING_SESSION] [--queue QUEUE] [--docker DOCKER]
|
||||
[--debugging-session DEBUGGING_SESSION] [--queue QUEUE] [--router-enabled] [--docker DOCKER]
|
||||
[--docker-args DOCKER_ARGS] [--public-ip [true/false]] [--remote-ssh-port REMOTE_SSH_PORT]
|
||||
[--vscode-server [true/false]] [--vscode-version VSCODE_VERSION]
|
||||
[--vscode-extensions VSCODE_EXTENSIONS] [--jupyter-lab [true/false]]
|
||||
[--upload-files UPLOAD_FILES] [--continue-session CONTINUE_SESSION]
|
||||
[--store-workspace STORE_WORKSPACE] [--git-credentials [true/false]]
|
||||
[--user-folder USER_FOLDER] [--packages [PACKAGES [PACKAGES ...]]]
|
||||
[--requirements REQUIREMENTS] [--init-script [INIT_SCRIPT]] [--config-file CONFIG_FILE]
|
||||
[--remote-gateway [REMOTE_GATEWAY]] [--base-task-id BASE_TASK_ID] [--project PROJECT]
|
||||
[--session-name SESSION_NAME] [--session-tags [SESSION_TAGS [SESSION_TAGS ...]]]
|
||||
[--vscode-server [true/false]] [--vscode-version VSCODE_VERSION] [--vscode-extensions VSCODE_EXTENSIONS]
|
||||
[--jupyter-lab [true/false]] [--upload-files UPLOAD_FILES] [--continue-session CONTINUE_SESSION]
|
||||
[--store-workspace STORE_WORKSPACE] [--git-credentials [true/false]] [--user-folder USER_FOLDER]
|
||||
[--packages [PACKAGES [PACKAGES ...]]] [--requirements REQUIREMENTS] [--init-script [INIT_SCRIPT]]
|
||||
[--config-file CONFIG_FILE] [--remote-gateway [REMOTE_GATEWAY]] [--base-task-id BASE_TASK_ID]
|
||||
[--project PROJECT] [--session-name SESSION_NAME] [--session-tags [SESSION_TAGS [SESSION_TAGS ...]]]
|
||||
[--disable-session-cleanup [true/false]] [--keepalive [true/false]]
|
||||
[--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]]
|
||||
[--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]]
|
||||
[--skip-docker-network [true/false]] [--password PASSWORD] [--username USERNAME]
|
||||
[--force-dropbear [true/false]] [--verbose] [--yes]
|
||||
[--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]] [--skip-docker-network [true/false]]
|
||||
[--password PASSWORD] [--randomize [RANDOMIZE [RANDOMIZE ...]]] [--username USERNAME]
|
||||
[--force-dropbear [true/false]] [--disable-store-defaults] [--disable-fingerprint-check] [--verbose]
|
||||
[--yes]
|
||||
{list,info,shutdown} ...
|
||||
|
||||
clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine
|
||||
@ -338,65 +337,64 @@ optional arguments:
|
||||
--attach [ATTACH] Attach to running interactive session (default: previous session)
|
||||
--shutdown [SHUTDOWN], -S [SHUTDOWN]
|
||||
Shut down an active session (default: previous session)
|
||||
--shell Open the SSH shell session directly, notice quitting the SSH session will Not shut down the
|
||||
remote session
|
||||
--shell Open the SSH shell session directly, notice quiting the SSH session will Not shutdown the remote session
|
||||
--debugging-session DEBUGGING_SESSION
|
||||
Pass existing Task id (experiment), create a copy of the experiment on a remote machine,
|
||||
and launch jupyter/ssh for interactive access. Example --debugging-session <task_id>
|
||||
Pass existing Task id (experiment), create a copy of the experiment on a remote machine, and launch
|
||||
jupyter/ssh for interactive access. Example --debugging-session <task_id>
|
||||
--queue QUEUE Select the queue to launch the interactive session on (default: previously used queue)
|
||||
--docker DOCKER Select the docker image to use in the interactive session (default: previously used
|
||||
docker image or `nvidia/cuda:11.6.2-runtime-ubuntu20.04`)
|
||||
--router-enabled If we have a clearml Router set, make sure we request direct TCP routing to our container.
|
||||
--docker DOCKER Select the docker image to use in the interactive session on (default: previously used docker image or
|
||||
`nvidia/cuda:11.6.2-runtime-ubuntu20.04`)
|
||||
--docker-args DOCKER_ARGS
|
||||
Add additional arguments for the docker image to use in the interactive session on
|
||||
(default: previously used docker-args)
|
||||
Add additional arguments for the docker image to use in the interactive session on (default: previously
|
||||
used docker-args)
|
||||
--public-ip [true/false]
|
||||
If True, register the public IP of the remote machine. Set if running on the cloud.
|
||||
Default: false (use for local / on-premises)
|
||||
If True register the public IP of the remote machine. Set if running on the cloud. Default: false (use
|
||||
for local / on-premises)
|
||||
--remote-ssh-port REMOTE_SSH_PORT
|
||||
Set the remote ssh server port, running on the agent`s machine. (default: 10022)
|
||||
--vscode-server [true/false]
|
||||
Install vscode server (code-server) on interactive session (default: true)
|
||||
--vscode-version VSCODE_VERSION
|
||||
Set vscode server (code-server) version, as well as vscode python extension version
|
||||
<vscode:python-ext> (example: "3.7.4:2020.10.332292344")
|
||||
Set vscode server (code-server) version, as well as vscode python extension version <vscode:python-ext>
|
||||
(example: "3.7.4:2020.10.332292344")
|
||||
--vscode-extensions VSCODE_EXTENSIONS
|
||||
Install additional vscode extensions, as well as vscode python extension (example: "ms-
|
||||
python.python,ms-python.black-formatter,ms-python.pylint,ms-python.flake8")
|
||||
Install additional vscode extensions, as well as vscode python extension (example: "ms-python.python,ms-
|
||||
python.black-formatter,ms-python.pylint,ms-python.flake8")
|
||||
--jupyter-lab [true/false]
|
||||
Install Jupyter-Lab on interactive session (default: true)
|
||||
--upload-files UPLOAD_FILES
|
||||
Advanced: Upload local files/folders to the remote session. Example: `/my/local/data/`
|
||||
will upload the local folder and extract it into the container in ~/session-files/
|
||||
Advanced: Upload local files/folders to the remote session. Example: `/my/local/data/` will upload the
|
||||
local folder and extract it into the container in ~/session-files/
|
||||
--continue-session CONTINUE_SESSION
|
||||
Continue previous session (ID provided) restoring your workspace (see --store-workspace)
|
||||
--store-workspace STORE_WORKSPACE
|
||||
Upload/Restore remote workspace folder. Example: `~/workspace/` will automatically
|
||||
restore/store the *containers* folder and extract it into the next session. Use with
|
||||
--continue-session to continue your previous work from your exact container state
|
||||
Upload/Restore remote workspace folder. Example: `~/workspace/` will automatically restore/store the
|
||||
*containers* folder and extract it into next the session. Use with --continue-session to continue your
|
||||
previous work from your exact container state
|
||||
--git-credentials [true/false]
|
||||
If true, local .git-credentials file is sent to the interactive session. (default: false)
|
||||
--user-folder USER_FOLDER
|
||||
Advanced: Set the remote base folder (default: ~/)
|
||||
--packages [PACKAGES [PACKAGES ...]]
|
||||
Additional packages to add, supports version numbers (default: previously added packages).
|
||||
examples: --packages torch==1.7 tqdm
|
||||
Additional packages to add, supports version numbers (default: previously added packages). examples:
|
||||
--packages torch==1.7 tqdm
|
||||
--requirements REQUIREMENTS
|
||||
Specify requirements.txt file to install when setting the interactive session.
|
||||
Requirements file is read and stored in `packages` section as default for the next
|
||||
sessions. Can be overridden by calling `--packages`
|
||||
Specify requirements.txt file to install when setting the interactive session. Requirements file is read
|
||||
and stored in `packages` section as default for the next sessions. Can be overridden by calling
|
||||
`--packages`
|
||||
--init-script [INIT_SCRIPT]
|
||||
Specify BASH init script file to be executed when setting the interactive session. Script
|
||||
content is read and stored as default script for the next sessions. To clear the init-
|
||||
script do not pass a file
|
||||
Specify BASH init script file to be executed when setting the interactive session. Script content is
|
||||
read and stored as default script for the next sessions. To clear the init-script do not pass a file
|
||||
--config-file CONFIG_FILE
|
||||
Advanced: Change the configuration file used to store the previous state (default:
|
||||
~/.clearml_session.json)
|
||||
--remote-gateway [REMOTE_GATEWAY]
|
||||
Advanced: Specify gateway ip/address:port to be passed to interactive session (for use
|
||||
with k8s ingestion / ELB)
|
||||
Advanced: Specify gateway ip/address:port to be passed to interactive session (for use with k8s
|
||||
ingestion / ELB)
|
||||
--base-task-id BASE_TASK_ID
|
||||
Advanced: Set the base task ID for the interactive session. (default: previously used
|
||||
Task). Use `none` for the default interactive session
|
||||
Advanced: Set the base task ID for the interactive session. (default: previously used Task). Use `none`
|
||||
for the default interactive session
|
||||
--project PROJECT Advanced: Set the project name for the interactive session Task
|
||||
--session-name SESSION_NAME
|
||||
Advanced: Set the name of the interactive session Task
|
||||
@ -405,33 +403,32 @@ optional arguments:
|
||||
--disable-session-cleanup [true/false]
|
||||
Advanced: If set, previous interactive sessions are not deleted
|
||||
--keepalive [true/false]
|
||||
Advanced: If set, enables the transparent proxy always keeping the sockets alive. Default:
|
||||
False, do not use transparent sockets for mitigating connection drops.
|
||||
Advanced: If set, enables the transparent proxy always keeping the sockets alive. Default: False, do not
|
||||
use transparent socket for mitigating connection drops.
|
||||
--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]
|
||||
Advanced: Excluded queues with this specific tag from the selection
|
||||
--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]
|
||||
Advanced: Only include queues with this specific tag from the selection
|
||||
--skip-docker-network [true/false]
|
||||
Advanced: If set, `--network host` is **not** passed to docker (assumes k8s network
|
||||
ingestion) (default: false)
|
||||
--password PASSWORD Advanced: Select ssh password for the interactive session (default: `randomly-generated`
|
||||
or previously used one)
|
||||
--username USERNAME Advanced: Select ssh username for the interactive session (default: `root` or previously
|
||||
Advanced: If set, `--network host` is **not** passed to docker (assumes k8s network ingestion) (default:
|
||||
false)
|
||||
--password PASSWORD Advanced: Select ssh password for the interactive session (default: `randomly-generated` or previously
|
||||
used one)
|
||||
--randomize Advanced: Recreate a new random ssh password for the interactive session options:
|
||||
`--randomize` one time recreate, --randomize `always` create a new random password for
|
||||
every session
|
||||
--randomize [RANDOMIZE [RANDOMIZE ...]]
|
||||
Advanced: Recreate a new random ssh password for the interactive session options: `--randomize` one time
|
||||
recreate random password, --randomize `always` create a new random password for every session
|
||||
--username USERNAME Advanced: Select ssh username for the interactive session (default: `root` or previously used one)
|
||||
--force-dropbear [true/false]
|
||||
Force using `dropbear` instead of SSHd
|
||||
--disable-store-defaults
|
||||
If set, do not store current setup as new default configuration
|
||||
--disable-fingerprint-check
|
||||
Advanced: If set, ignore the remote SSH server fingerprint check
|
||||
--verbose Advanced: If set, print verbose progress information, e.g. the remote machine setup
|
||||
process log
|
||||
--verbose Advanced: If set, print verbose progress information, e.g. the remote machine setup process log
|
||||
--yes, -y Automatic yes to prompts; assume "yes" as answer to all prompts and run non-interactively
|
||||
|
||||
Notice! all arguments are stored as new defaults for the next execution
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
@ -715,6 +715,7 @@ def clone_task(state, project_id=None):
|
||||
task_params["{}/force_dropbear".format(section)] = bool(state.get('force_dropbear'))
|
||||
task_params["{}/store_workspace".format(section)] = state.get('store_workspace')
|
||||
task_params["{}/use_ssh_proxy".format(section)] = state.get('keepalive')
|
||||
task_params["{}/router_enabled".format(section)] = bool(state.get('router_enabled'))
|
||||
if state.get('user_folder'):
|
||||
task_params['{}/user_base_directory'.format(section)] = state.get('user_folder')
|
||||
docker = state.get('docker') or task.get_base_docker()
|
||||
@ -977,7 +978,8 @@ def start_ssh_tunnel(username, remote_address, ssh_port, ssh_password, local_rem
|
||||
if debug:
|
||||
print("ERROR: running local SSH client [{}] failed connecting to {}: {}".format(command, args, ex))
|
||||
else:
|
||||
print("ERROR: running local SSH client failed connecting to {}: {}".format(remote_address, ex))
|
||||
print("ERROR: running local SSH client failed connecting to {} [{}]\n"
|
||||
" for additional details re-run with --verbose".format(remote_address, type(ex)))
|
||||
|
||||
if child:
|
||||
child.terminate(force=True)
|
||||
@ -1165,6 +1167,8 @@ def monitor_ssh_tunnel(state, task, ssh_setup_completed_callback=None):
|
||||
else:
|
||||
logging.getLogger().warning('SSH tunneling failed, retrying in {} seconds'.format(3))
|
||||
sleep(3.)
|
||||
# clear ssh port, so that we reload it from Task (i.e. sync with router if it's there)
|
||||
ssh_port = None
|
||||
continue
|
||||
|
||||
connect_state['reconnect'] = False
|
||||
@ -1355,6 +1359,10 @@ def setup_parser(parser):
|
||||
'and launch jupyter/ssh for interactive access. Example --debugging-session <task_id>')
|
||||
parser.add_argument('--queue', type=str, default=None,
|
||||
help='Select the queue to launch the interactive session on (default: previously used queue)')
|
||||
parser.add_argument("--router-enabled", default=None, nargs='?', const='true', metavar='true/false',
|
||||
type=lambda x: (str(x).strip().lower() in ('true', 'yes')),
|
||||
help="If we have a clearml Router set, make sure we request direct TCP routing "
|
||||
"to our container. ")
|
||||
parser.add_argument('--docker', type=str, default=None,
|
||||
help='Select the docker image to use in the interactive session on '
|
||||
'(default: previously used docker image or `{}`)'.format(default_docker_image))
|
||||
|
@ -591,6 +591,7 @@ def setup_ssh_server(hostname, hostnames, param, task, env):
|
||||
print("Installing SSH Server on {} [{}]".format(hostname, hostnames))
|
||||
ssh_password = param.get("ssh_password", "training")
|
||||
|
||||
proxy_port = port = None
|
||||
ssh_port = None
|
||||
if Session.check_min_api_version("2.13"):
|
||||
try:
|
||||
@ -797,6 +798,8 @@ def setup_ssh_server(hostname, hostnames, param, task, env):
|
||||
except Exception as ex:
|
||||
print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex))
|
||||
|
||||
return proxy_port or port
|
||||
|
||||
|
||||
def _b64_decode_file(encoded_string):
|
||||
# noinspection PyBroadException
|
||||
@ -1019,6 +1022,7 @@ def get_host_name(task, param):
|
||||
# update host name
|
||||
if (not task.get_parameter(name='properties/external_address') and
|
||||
not task.get_parameter(name='properties/k8s-gateway-address')):
|
||||
|
||||
if task._get_runtime_properties().get("external_address"):
|
||||
external_addr = task._get_runtime_properties().get("external_address")
|
||||
else:
|
||||
@ -1274,9 +1278,15 @@ def main():
|
||||
"force_dropbear": False,
|
||||
"store_workspace": None,
|
||||
"use_ssh_proxy": False,
|
||||
"router_enabled": False,
|
||||
}
|
||||
task = init_task(param, default_ssh_fingerprint)
|
||||
|
||||
# if router is enabled, do not request a public IP, enforce local IP
|
||||
if param.get("router_enabled") and param.get("public_ip"):
|
||||
print("External TCP router configured, disabling `public_ip` request")
|
||||
param["public_ip"] = False
|
||||
|
||||
run_user_init_script(task)
|
||||
|
||||
# restore workspace if exists
|
||||
@ -1294,7 +1304,20 @@ def main():
|
||||
|
||||
env = setup_user_env(param, task)
|
||||
|
||||
setup_ssh_server(hostname, hostnames, param, task, env)
|
||||
ssh_port = setup_ssh_server(hostname, hostnames, param, task, env)
|
||||
|
||||
# make sure we set it to the runtime properties
|
||||
if ssh_port and param.get("router_enabled"):
|
||||
# noinspection PyProtectedMember
|
||||
address = task._get_runtime_properties().get("external_address") or ""
|
||||
print("Requesting TCP route from router ingress to {} port {}".format(address, ssh_port))
|
||||
# noinspection PyProtectedMember
|
||||
task._set_runtime_properties({
|
||||
"external_address": address,
|
||||
"external_tcp_port": ssh_port,
|
||||
"_SERVICE": "EXTERNAL_TCP",
|
||||
})
|
||||
task.set_system_tags((task.get_system_tags() or []) + ["external_service"])
|
||||
|
||||
start_vscode_server(hostname, hostnames, param, task, env)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user