From 3f3cb50d45edfb473d5a5bd408ed01f10592007c Mon Sep 17 00:00:00 2001 From: allegroai <> Date: Thu, 7 Mar 2024 11:24:11 +0200 Subject: [PATCH] Add Workspace Synchronization - Update default docker to nvidia/cuda:11.6.2-runtime-ubuntu20.04 - Add --session-name --session-tag to allow setting session Task name and tags - Add list/info/shutdown commands - Add --store-workspace allowing remote session to store a specific folder and continue next session with a fully restored folder - Add --disable-session-cleanup to disable old session cleanup --- README.md | 382 ++++++++++++-------- clearml_session/__main__.py | 346 ++++++++++++++++-- clearml_session/interactive_session_task.py | 205 ++++++++++- requirements.txt | 3 +- 4 files changed, 733 insertions(+), 203 deletions(-) diff --git a/README.md b/README.md index cf88e77..305b0d6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ -## **`clearml-session`
CLI for launching JupyterLab / VSCode on a remote machine** +## **`clearml-session`
CLI for launching JupyterLab / VSCode / SSH on a remote machine** + +## đŸ”Ĩ NEW in version `0.13` [Workspace Syncing](#store-and-synchronize-interactive-session-workspace) 🚀 [![GitHub license](https://img.shields.io/github/license/allegroai/clearml-session.svg)](https://img.shields.io/github/license/allegroai/clearml-session.svg) @@ -17,14 +19,20 @@ **`clearml-session`** is a utility for launching detachable remote interactive sessions (MacOS, Windows, Linux) ### tl;dr -CLI to launch remote sessions for JupyterLab / VSCode-server / SSH, inside any docker image! +**CLI to launch a remote session of Jupyter-Lab / VSCode / SSH, +inside any docker container on any deployment, Cloud / Kubernetes / Bare-Metal** -### What does it do? +### 🔰 What does it do? Starting a clearml (ob)session from your local machine triggers the following: - ClearML allocates a remote instance (GPU) from your dedicated pool - On the allocated instance it will spin **jupyter-lab** + **vscode server** + **SSH** access for interactive usage (i.e., development) - ClearML will start monitoring machine performance, allowing DevOps to detect stale instances and spin them down +- NEW đŸ”Ĩ Kubernetes support, develop directly inside your pods! No kubectl required! +Read more about `clearml-agent` and interactive sessions [here](https://clear.ml/docs/latest/docs/clearml_agent/#kubernetes) +- NEW 🎉 Automatically store & sync your [interactive session workspace](#store-and-synchronize-interactive-session-workspace). +`clearml-session` will automatically create a snapshot of your entire workspace when shutting it down, +and later restore into a new session on a different remote machine > ℹī¸ **Remote PyCharm:** You can also work with PyCharm in a remote session over SSH. Use the [PyCharm Plugin](https://github.com/allegroai/clearml-pycharm-plugin) to automatically sync local configurations with a remote session. @@ -42,7 +50,7 @@ interactive usage (i.e., development) Supported OS: MacOS, Windows, Linux -## Secure & Stable +## 🔒 Secure & Stable **clearml-session** creates a single, secure, and encrypted connection to the remote machine over SSH. SSH credentials are automatically generated by the CLI and contain fully random 32 bytes password. @@ -58,7 +66,7 @@ the base SSH network tunnel, without breaking JupyterLab/VSCode-server or your o --- -## How to use: Interactive Session +## ⚡ How to use: Interactive Session 1. run `clearml-session` @@ -70,39 +78,7 @@ the base SSH network tunnel, without breaking JupyterLab/VSCode-server or your o pass **git credentials**, etc. See below for full CLI options. -## Frequently Asked Questions: - -#### How Does ClearML enable this? - -The `clearml-session` creates a new interactive `Task` in the system (default project: DevOps). - -This `Task` is responsible for setting the SSH and JupyterLab/VSCode on the host machine. - -The local `clearml-session` awaits for the interactive Task to finish with the initial setup, then -it connects via SSH to the host machine (see "safe and stable" above), and tunnels -both SSH and JupyterLab over the SSH connection. - -The end results is a local link which you can use to access the JupyterLab/VSCode on the remote machine, over a **secure and encrypted** connection! - -#### How can this be used to scale up/out development resources? - -**Clearml** has a cloud autoscaler, so you can easily and automatically spin machines for development! - -There is also a default docker image to use when initiating a task. - -This means that using **clearml-session**s -with the autoscaler enabled, allows for turn-key secure development environment inside a docker of your choosing. - -Learn more about it [here](https://clear.ml/docs/latest/docs/guides/services/aws_autoscaler) - -#### Does this fit Work From Home situations? -**YES**. Install `clearml-agent` on target machines inside the organization, connect over your company VPN -and use `clearml-session` to gain access to a dedicated on-prem machine with the docker of your choosing -(with out-of-the-box support for any internal docker artifactory). - -Learn more about how to utilize your office workstations and on-prem machines [here](https://clear.ml/docs/latest/docs/clearml_agent) - -## Tutorials +## 📖 Tutorials ### Getting started @@ -144,10 +120,20 @@ Enter "r" (or "reconnect") to reconnect the session (for example after suspend) or "Shutdown" to shut down remote interactive session ``` -Click on the JupyterLab link (http://localhost:8878/?token=xyz) +Click on the **Jupyter Lab** link (http://localhost:8878/?token=xyz) +Or **VScode** (running inside your remote container) (http://localhost:8898/) +Or drop into **SSH** shell by typying `shell` + Open your terminal, clone your code & start working :) -### Leaving a session and reconnecting from the same machine +> ℹī¸ **TIP**: You can additional python package to your remote session setup by adding `--packages` to the command line, +> for example to add `boto3` add `--packages "boto3>1"` + +> ℹī¸ **TIP**: If you need direct SSH into the remote container from your terminal, +> you can directly drop into a shell by adding `--shell` to the command line + + +### Leaving a session and reconnecting to it On the `clearml-session` CLI terminal, enter 'quit' or press `Ctrl-C` It will close the CLI but preserve the remote session (i.e. remote session will remain running) @@ -169,7 +155,7 @@ Connect to active session id=3d38e738c5ff458a9ec465e77e19da23 [Y]/n? On the `clearml-session` CLI terminal, enter 'shutdown' (case-insensitive) It will shut down the remote session, free the resource and close the CLI -``` console +```console Enter "r" (or "reconnect") to reconnect the session (for example after suspend) `s` (or "shell") to connect to the SSH session `Ctrl-C` (or "quit") to abort (remote session remains active) @@ -182,178 +168,262 @@ Remote session shutdown Goodbye ``` +You can also use the CLI to shut down a specific clearml interactive session + +```bash +clearml-session shutdown --id +``` + ### Connecting to a running interactive session from a different machine Continue working on an interactive session from **any** machine. In the `clearml` web UI, go to DevOps project, and find your interactive session. Click on the ID button next to the Task name, and copy the unique ID. -``` bash -clearml-session --attach +```bash +clearml-session --attach ``` Click on the JupyterLab/VSCode link, or connect directly to the SSH session +> ✨ **TIP**: You can work & debug your colleagues code and workspace by sharing the `session id` +> and connect to the same remote container together with `--attach` + + +### Store and synchronize interactive session workspace + +Specify the remote workspace root-folder by adding `--store-workspace ~/workspace` to the command line. +In the remote session container, put all your code / data under the `~/workspace` directory. +When your session is shut down, the workspace folder will be automatically package and stored on the clearml file server. +In your next `clearml-session` execution specify again `--store-workspace ~/workspace` and clearml-session +will grab the previous workspace snapshot and restore it into the new remote container in `~/workspace`. + +```bash +clearml-session --store-workspace ~/workspace --docker python:3.10-bullseye +``` + +To continue the last aborted session and restore the workspace + +```bash +clearml-session --store-workspace ~/workspace --docker python:3.10-bullseye +``` + +```console +clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine +Verifying credentials +Use previous queue (resource) '1xGPU' [Y]/n? + +Interactive session config: +... +Restore workspace from session id=01bf86f038314434878b2413343ba746 'interactive_session' @ 2024-03-02 20:34:03 [Y]/n? +Restoring workspace from previous session id=01bf86f038314434878b2413343ba746 +``` + +To continue a **specific** session ID and restore its workspace + +```bash +clearml-session --continue-session --store-workspace ~/workspace --docker python:3.10-bullseye +``` + +### Upload local files to remote session + +If you need to upload files from your local machine into the remote session, +specify the file or directory with `--upload-files /mnt/data/stuff`. +The entire content of the directory / file will be copied into your remote `clearml-session` +container under the `~/session-files/` directory. + +Can be used in conjunction with `--store-session` to easily move workloads between local development machine +and remote machines with 100% persistent workspace synchronization. + +```bash +clearml-session --upload-files /mnt/data/stuff +``` + + ### Debug a previously executed experiment -If you have a previously executed experiment in the system, -you can create an exact copy of the experiment and debug it on the remote interactive session. +If you have a previously executed experiment (Task) on the clearml platform, +you can create an exact copy of the experiment (Task) and debug it on the remote interactive session. `clearml-session` will replicate the exact remote environment, add JupyterLab/VSCode/SSH and allow you interactively -execute and debug the experiment, on the allocated remote machine. +execute and debug the experiment, on the interactive remote container. In the `clearml` web UI, find the experiment (Task) you wish to debug. -Click on the ID button next to the Task name, and copy the unique ID. +Click on the ID button next to the Task name, and copy the unique ID, then execute: -``` bash +```bash clearml-session --debugging-session ``` -Click on the JupyterLab/VSCode link, or connect directly to the SSH session +Click on the JupyterLab/VSCode link, or drop directly into an SSH shell by typying `shell` + + +## ❓ Frequently Asked Questions + +#### How does it work? + +The `clearml-session` creates a new interactive `Task` in the system (default project: DevOps). + +This `Task` is responsible for setting the SSH and JupyterLab/VSCode on the host machine. + +The local `clearml-session` awaits for the interactive Task to finish with the initial setup, then +it connects via SSH to the host machine (see "safe and stable" above), and tunnels +both SSH and JupyterLab over the SSH connection. + +The end results is a local link which you can use to access the JupyterLab/VSCode on the remote machine, over a **secure and encrypted** connection! + +#### Does `clearml-session` support Kubernetes clusters? + +Yes! `clearml-session` utilizes the `clearml-agent` kubernetes glue together with routing capabilities in order to allow +any clearml-session to spin a container (pod) on the kubernetes cluster and securely connect **directly** into the pod. +This feature does not require any kubernetes access from the users, and simplifies code +development on kubernetes clusters as well as job scheduling & launching. +Read more on how to deploy clearml on kubernetes [here](https://clear.ml/docs/latest/docs/clearml_agent/#kubernetes) + +#### How can I use `clearml-session` to scale up / out development resources? + +**Clearml** has a [cloud autoscaler](https://clear.ml/docs/latest/docs/cloud_autoscaling/autoscaling_overview), so you can easily and automatically spin machines for development! + +There is also a default docker image to use when initiating a task. + +This means that using **clearml-session**s +with the autoscaler enabled, allows for turn-key secure development environment inside a docker of your choosing. + +Learn more about it [here](https://clear.ml/docs/latest/docs/guides/services/aws_autoscaler) & [here](https://clear.ml/docs/latest/docs/webapp/applications/apps_gpu_compute) + +#### Does `clearml-session` fit Work-From-Home setup? +**YES**. Install `clearml-agent` on target machines inside the organization, connect over your company VPN +and use `clearml-session` to gain access to a dedicated on-prem machine with the docker of your choosing +(with out-of-the-box support for any internal docker artifactory). + +Learn more about how to utilize your office workstations and on-prem machines [here](https://clear.ml/docs/latest/docs/clearml_agent) ## CLI options -``` bash +```bash clearml-session --help ``` -``` console -clearml-session - CLI for launching JupyterLab / VSCode on a remote machine -usage: clearml-session [-h] [--version] [--attach [ATTACH]] - [--shutdown [SHUTDOWN]] [--shell] - [--debugging-session DEBUGGING_SESSION] [--queue QUEUE] - [--docker DOCKER] [--docker-args DOCKER_ARGS] - [--public-ip [true/false]] - [--remote-ssh-port REMOTE_SSH_PORT] - [--vscode-server [true/false]] - [--vscode-version VSCODE_VERSION] - [--vscode-extensions VSCODE_EXTENSIONS] - [--jupyter-lab [true/false]] - [--upload-files UPLOAD_FILES] - [--git-credentials [true/false]] - [--user-folder USER_FOLDER] - [--packages [PACKAGES [PACKAGES ...]]] - [--requirements REQUIREMENTS] - [--init-script [INIT_SCRIPT]] - [--config-file CONFIG_FILE] - [--remote-gateway [REMOTE_GATEWAY]] - [--base-task-id BASE_TASK_ID] [--project PROJECT] - [--keepalive [true/false]] +```console +clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine +usage: clearml-session [-h] [--version] [--attach [ATTACH]] [--shutdown [SHUTDOWN]] [--shell] + [--debugging-session DEBUGGING_SESSION] [--queue QUEUE] [--docker DOCKER] + [--docker-args DOCKER_ARGS] [--public-ip [true/false]] [--remote-ssh-port REMOTE_SSH_PORT] + [--vscode-server [true/false]] [--vscode-version VSCODE_VERSION] + [--vscode-extensions VSCODE_EXTENSIONS] [--jupyter-lab [true/false]] + [--upload-files UPLOAD_FILES] [--continue-session CONTINUE_SESSION] + [--store-workspace STORE_WORKSPACE] [--git-credentials [true/false]] + [--user-folder USER_FOLDER] [--packages [PACKAGES [PACKAGES ...]]] + [--requirements REQUIREMENTS] [--init-script [INIT_SCRIPT]] [--config-file CONFIG_FILE] + [--remote-gateway [REMOTE_GATEWAY]] [--base-task-id BASE_TASK_ID] [--project PROJECT] + [--session-name SESSION_NAME] [--session-tags [SESSION_TAGS [SESSION_TAGS ...]]] + [--disable-session-cleanup [true/false]] [--keepalive [true/false]] [--queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]]] [--queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]]] - [--skip-docker-network [true/false]] - [--password PASSWORD] [--username USERNAME] - [--force_dropbear [true/false]] [--verbose] [--yes] + [--skip-docker-network [true/false]] [--password PASSWORD] [--username USERNAME] + [--force-dropbear [true/false]] [--verbose] [--yes] + {list,info,shutdown} ... -clearml-session - CLI for launching JupyterLab / VSCode on a remote machine +clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine + +positional arguments: + {list,info,shutdown} ClearML session control commands + list List running Sessions + info Detailed information on specific session + shutdown Shutdown specific session optional arguments: -h, --help show this help message and exit --version Display the clearml-session utility version - --attach [ATTACH] Attach to running interactive session (default: - previous session) + --attach [ATTACH] Attach to running interactive session (default: previous session) --shutdown [SHUTDOWN], -S [SHUTDOWN] - Shut down an active session (default: previous - session) - --shell Open the SSH shell session directly, notice quiting - the SSH session will Not shutdown the remote session + Shut down an active session (default: previous session) + --shell Open the SSH shell session directly, notice quiting the SSH session will Not shutdown the + remote session --debugging-session DEBUGGING_SESSION - Pass existing Task id (experiment), create a copy of - the experiment on a remote machine, and launch - jupyter/ssh for interactive access. Example - --debugging-session - --queue QUEUE Select the queue to launch the interactive session on - (default: previously used queue) - --docker DOCKER Select the docker image to use in the interactive - session on (default: previously used docker image or - `nvidia/cuda:10.1-runtime-ubuntu18.04`) + Pass existing Task id (experiment), create a copy of the experiment on a remote machine, + and launch jupyter/ssh for interactive access. Example --debugging-session + --queue QUEUE Select the queue to launch the interactive session on (default: previously used queue) + --docker DOCKER Select the docker image to use in the interactive session on (default: previously used + docker image or `nvidia/cuda:11.6.2-runtime-ubuntu20.04`) --docker-args DOCKER_ARGS - Add additional arguments for the docker image to use - in the interactive session on (default: previously - used docker-args) + Add additional arguments for the docker image to use in the interactive session on + (default: previously used docker-args) --public-ip [true/false] - If True register the public IP of the remote machine. - Set if running on the cloud. Default: false (use for - local / on-premises) + If True register the public IP of the remote machine. Set if running on the cloud. + Default: false (use for local / on-premises) --remote-ssh-port REMOTE_SSH_PORT - Set the remote ssh server port, running on the agent`s - machine. (default: 10022) + Set the remote ssh server port, running on the agent`s machine. (default: 10022) --vscode-server [true/false] - Install vscode server (code-server) on interactive - session (default: true) + Install vscode server (code-server) on interactive session (default: true) --vscode-version VSCODE_VERSION - Set vscode server (code-server) version, as well as - vscode python extension version - (example: "3.7.4:2020.10.332292344") + Set vscode server (code-server) version, as well as vscode python extension version + (example: "3.7.4:2020.10.332292344") --vscode-extensions VSCODE_EXTENSIONS - Install additional vscode extensions, as well as - vscode python extension (example: "ms- - python.python,ms-python.black-formatter,ms- - python.pylint,ms-python.flake8") + Install additional vscode extensions, as well as vscode python extension (example: "ms- + python.python,ms-python.black-formatter,ms-python.pylint,ms-python.flake8") --jupyter-lab [true/false] - Install Jupyter-Lab on interactive session (default: - true) + Install Jupyter-Lab on interactive session (default: true) --upload-files UPLOAD_FILES - Advanced: Upload local files/folders to the remote - session. Example: `/my/local/data/` will upload the - local folder and extract it into the container in - ~/session-files/ + Advanced: Upload local files/folders to the remote session. Example: `/my/local/data/` + will upload the local folder and extract it into the container in ~/session-files/ + --continue-session CONTINUE_SESSION + Continue previous session (ID provided) restoring your workspace (see --store-workspace) + --store-workspace STORE_WORKSPACE + Upload/Restore remote workspace folder. Example: `~/workspace/` will automatically + restore/store the *containers* folder and extract it into next the session. Use with + --continue-session to continue your previous work from your exact container state --git-credentials [true/false] - If true, local .git-credentials file is sent to the - interactive session. (default: false) + If true, local .git-credentials file is sent to the interactive session. (default: false) --user-folder USER_FOLDER Advanced: Set the remote base folder (default: ~/) --packages [PACKAGES [PACKAGES ...]] - Additional packages to add, supports version numbers - (default: previously added packages). examples: - --packages torch==1.7 tqdm + Additional packages to add, supports version numbers (default: previously added packages). + examples: --packages torch==1.7 tqdm --requirements REQUIREMENTS - Specify requirements.txt file to install when setting - the interactive session. Requirements file is read and - stored in `packages` section as default for the next + Specify requirements.txt file to install when setting the interactive session. + Requirements file is read and stored in `packages` section as default for the next sessions. Can be overridden by calling `--packages` --init-script [INIT_SCRIPT] - Specify BASH init script file to be executed when - setting the interactive session. Script content is - read and stored as default script for the next - sessions. To clear the init-script do not pass a file + Specify BASH init script file to be executed when setting the interactive session. Script + content is read and stored as default script for the next sessions. To clear the init- + script do not pass a file --config-file CONFIG_FILE - Advanced: Change the configuration file used to store - the previous state (default: ~/.clearml_session.json) + Advanced: Change the configuration file used to store the previous state (default: + ~/.clearml_session.json) --remote-gateway [REMOTE_GATEWAY] - Advanced: Specify gateway ip/address:port to be passed - to interactive session (for use with k8s ingestion / - ELB) + Advanced: Specify gateway ip/address:port to be passed to interactive session (for use + with k8s ingestion / ELB) --base-task-id BASE_TASK_ID - Advanced: Set the base task ID for the interactive - session. (default: previously used Task). Use `none` - for the default interactive session - --project PROJECT Advanced: Set the project name for the interactive - session Task + Advanced: Set the base task ID for the interactive session. (default: previously used + Task). Use `none` for the default interactive session + --project PROJECT Advanced: Set the project name for the interactive session Task + --session-name SESSION_NAME + Advanced: Set the name of the interactive session Task + --session-tags [SESSION_TAGS [SESSION_TAGS ...]] + Advanced: Add tags to the interactive session for increased visibility + --disable-session-cleanup [true/false] + Advanced: If set, previous interactive sessions are not deleted --keepalive [true/false] - Advanced: If set, enables the transparent proxy always - keeping the sockets alive. Default: False, do not use - transparent socket for mitigating connection drops. + Advanced: If set, enables the transparent proxy always keeping the sockets alive. Default: + False, do not use transparent socket for mitigating connection drops. --queue-excluded-tag [QUEUE_EXCLUDED_TAG [QUEUE_EXCLUDED_TAG ...]] - Advanced: Excluded queues with this specific tag from - the selection + Advanced: Excluded queues with this specific tag from the selection --queue-include-tag [QUEUE_INCLUDE_TAG [QUEUE_INCLUDE_TAG ...]] - Advanced: Only include queues with this specific tag - from the selection + Advanced: Only include queues with this specific tag from the selection --skip-docker-network [true/false] - Advanced: If set, `--network host` is **not** passed - to docker (assumes k8s network ingestion) (default: - false) - --password PASSWORD Advanced: Select ssh password for the interactive - session (default: `randomly-generated` or previously + Advanced: If set, `--network host` is **not** passed to docker (assumes k8s network + ingestion) (default: false) + --password PASSWORD Advanced: Select ssh password for the interactive session (default: `randomly-generated` + or previously used one) + --username USERNAME Advanced: Select ssh username for the interactive session (default: `root` or previously used one) - --username USERNAME Advanced: Select ssh username for the interactive - session (default: `root` or previously used one) - --force_dropbear [true/false] + --force-dropbear [true/false] Force using `dropbear` instead of SSHd - --verbose Advanced: If set, print verbose progress information, - e.g. the remote machine setup process log - --yes, -y Automatic yes to prompts; assume "yes" as answer to - all prompts and run non-interactively + --verbose Advanced: If set, print verbose progress information, e.g. the remote machine setup + process log + --yes, -y Automatic yes to prompts; assume "yes" as answer to all prompts and run non-interactively -Notice! all arguments are stored as new defaults for the next session +Notice! all arguments are stored as new defaults for the next execution ``` + + diff --git a/clearml_session/__main__.py b/clearml_session/__main__.py index 167f578..f844327 100644 --- a/clearml_session/__main__.py +++ b/clearml_session/__main__.py @@ -35,7 +35,7 @@ except Exception: pass system_tag = 'interactive' -default_docker_image = 'nvidia/cuda:10.1-runtime-ubuntu18.04' +default_docker_image = 'nvidia/cuda:11.6.2-runtime-ubuntu20.04' class NonInteractiveError(Exception): @@ -146,10 +146,21 @@ def _get_available_ports(list_initial_ports): return available_ports -def create_base_task(state, project_name=None, task_name=None): - task = Task.create(project_name=project_name or 'DevOps', - task_name=task_name or 'Interactive Session', - task_type=Task.TaskTypes.application) +def create_base_task(state, project_name=None, task_name=None, continue_task_id=None, project_id=None): + if continue_task_id: + task = Task.clone( + source_task=continue_task_id, + project=project_id, + parent=continue_task_id, + name=task_name or 'Interactive Session' + ) + else: + task = Task.create( + project_name=project_name or 'DevOps', + task_name=task_name or 'Interactive Session', + task_type=Task.TaskTypes.application + ) + task_script = task.data.script.to_dict() base_script_file = os.path.abspath(os.path.join(__file__, '..', 'tcp_proxy.py')) with open(base_script_file, 'rt') as f: @@ -218,13 +229,13 @@ def create_base_task(state, project_name=None, task_name=None): return task -def create_debugging_task(state, debug_task_id): +def create_debugging_task(state, debug_task_id, task_name=None, task_project_id=None): debug_task = Task.get_task(task_id=debug_task_id) # if there is no git repository, we cannot debug it if not debug_task.data.script.repository: raise ValueError("Debugging task has no git repository, single script debugging is not supported.") - task = Task.clone(source_task=debug_task_id, parent=debug_task_id) + task = Task.clone(source_task=debug_task_id, parent=debug_task_id, name=task_name, project=task_project_id) task_state = task.export_task() @@ -285,23 +296,87 @@ def create_debugging_task(state, debug_task_id): return task -def delete_old_tasks(state, client, base_task_id): +def find_prev_session(state, client): + # nothing to do + if not state.get("store_workspace"): + return + + current_user_id = _get_user_id(client) + previous_tasks = client.tasks.get_all(**{ + 'status': ['failed', 'stopped', 'completed'], + 'system_tags': [system_tag], + 'page_size': 100, 'page': 0, + 'order_by': ['-last_update'], + 'user': [current_user_id], + 'only_fields': ['id'] + }) + + continue_session_id = state.get("continue_session") + # if we do not find something, we ignore it + state["continue_session"] = None + + for i, t in enumerate(previous_tasks): + try: + task = Task.get_task(task_id=t.id) + if state.get("store_workspace") and task.artifacts: + if continue_session_id and continue_session_id == t.id: + print("Restoring workspace from previous session id={} [{}]".format( + continue_session_id, task.data.last_update)) + state["continue_session"] = t.id + break + elif not continue_session_id and i == 0: + if not state.get("yes"): + choice = input("Restore workspace from session id={} '{}' @ {} [Y]/n? ".format( + t.id, task.name, str(task.data.last_update).split(".")[0])) + if str(choice).strip().lower() in ('n', 'no'): + continue + + print("Restoring workspace from previous session id={}".format(t.id)) + state["continue_session"] = t.id + break + except Exception as ex: + logging.getLogger().warning('Failed retrieving old session {}:'.format(t.id, ex)) + + +def delete_old_tasks(state, client, base_task_id, skip_latest_session=True): + if state["disable_session_cleanup"]: + return + print('Removing stale interactive sessions') + current_user_id = _get_user_id(client) previous_tasks = client.tasks.get_all(**{ 'status': ['failed', 'stopped', 'completed'], 'parent': base_task_id or None, 'system_tags': None if base_task_id else [system_tag], 'page_size': 100, 'page': 0, + 'order_by': ['-last_update'], 'user': [current_user_id], 'only_fields': ['id'] }) for i, t in enumerate(previous_tasks): + # skip the selected Task which has our new workspace + if state.get("continue_session") == t.id: + continue + if state.get('verbose'): print('Removing {}/{} stale sessions'.format(i+1, len(previous_tasks))) + # no need to worry about workspace snapshots, + # because they are input artifacts and thus will Not actually be deleted + # we will delete them manually if the Task has its own workspace snapshot try: - Task.get_task(task_id=t.id).delete(delete_artifacts_and_models=True, skip_models_used_by_other_tasks=True, raise_on_error=True) + task = Task.get_task(task_id=t.id) + # if we have any artifacts on this session Task + if skip_latest_session and task.artifacts and i == 0: + # do not delete this workspace yet (only next time) + continue + + task.delete( + delete_artifacts_and_models=True, + skip_models_used_by_other_tasks=True, + raise_on_error=True + ) except Exception as ex: logging.getLogger().warning('{}\nFailed deleting old session {}'.format(ex, t.id)) try: @@ -317,7 +392,8 @@ def _get_running_tasks(client, prev_task_id): 'system_tags': [system_tag], 'page_size': 10, 'page': 0, 'order_by': ['-last_update'], - 'user': [current_user_id], 'only_fields': ['id', 'created', 'parent'] + 'user': [current_user_id], + 'only_fields': ['id', 'created', 'parent'] }) tasks_id_created = [(t.id, t.created, t.parent) for t in previous_tasks] if prev_task_id and prev_task_id not in (t[0] for t in tasks_id_created): @@ -363,13 +439,9 @@ def _b64_encode_file(file): def get_project_id(project_name): project_id = None if project_name: - projects = Task.get_projects() - project_id = [p for p in projects if p.name == project_name] - if project_id: - project_id = project_id[0] - else: + project_id = Task.get_project_id(project_name=project_name) + if not project_id: logging.getLogger().warning("could not locate project by the named '{}'".format(project_name)) - project_id = None return project_id @@ -465,17 +537,19 @@ def get_user_inputs(args, parser, state, client): print("\nInteractive session config:\n{}\n".format( json.dumps({k: v for k, v in state.items() if not str(k).startswith('__')}, indent=4, sort_keys=True))) + return state + + +def ask_launch(args): # no need to ask just return the value - if assume_yes: - return state + if args.yes: + return choice = input('Launch interactive session [Y]/n? ') if str(choice).strip().lower() in ('n', 'no'): print('User aborted') exit(0) - return state - def save_state(state, state_file): # if we are running in debugging mode, @@ -504,25 +578,47 @@ def load_state(state_file): state.pop('yes', None) state.pop('shell', None) state.pop('upload_files', None) + state.pop('continue_session', None) return state def clone_task(state, project_id=None): new_task = False + project_id = \ + project_id or (get_project_id(project_name=state.get('project')) if state.get('project') else None) + if state.get('debugging_session'): print('Starting new debugging session to {}'.format(state.get('debugging_session'))) - task = create_debugging_task(state, state.get('debugging_session')) + task = create_debugging_task( + state, + state.get('debugging_session'), + task_name=state.get('session_name'), + task_project_id=project_id + ) elif state.get('base_task_id'): - print('Cloning base session {}'.format(state['base_task_id'])) - project_id = \ - project_id or (get_project_id(project_name=state.get('project')) if state.get('project') else None) - task = Task.clone(source_task=state['base_task_id'], project=project_id, parent=state['base_task_id']) + base_task_id = state.get('base_task_id') + print('Cloning base session {}'.format(base_task_id)) + task = Task.clone( + source_task=base_task_id, + project=project_id, + parent=base_task_id, + name=state.get('session_name') + ) task.set_system_tags([system_tag]) else: print('Creating new session') - task = create_base_task(state, project_name=state.get('project')) + task = create_base_task( + state, + project_name=state.get('project'), + task_name=state.get('session_name'), + continue_task_id=state.get('continue_session'), + project_id=project_id + ) new_task = True + if state.get("session_tags"): + task.set_tags(state.get("session_tags")) + print('Configuring new session') runtime_prop_support = Session.check_min_api_version("2.13") if runtime_prop_support: @@ -562,6 +658,7 @@ def clone_task(state, project_id=None): task_params["{}/vscode_version".format(section)] = state.get('vscode_version') or '' task_params["{}/vscode_extensions".format(section)] = state.get('vscode_extensions') or '' task_params["{}/force_dropbear".format(section)] = bool(state.get('force_dropbear')) + task_params["{}/store_workspace".format(section)] = state.get('store_workspace') if state.get('user_folder'): task_params['{}/user_base_directory'.format(section)] = state.get('user_folder') docker = state.get('docker') or task.get_base_docker() @@ -624,6 +721,10 @@ def clone_task(state, project_id=None): task.update_task({'script': {'requirements': requirements}}) task.set_parameters(task_params) print('New session created [id={}]'.format(task.id)) + if state.get("continue_session") and state.get("store_workspace"): + print('Restoring remote workspace from [{}] into {}'.format( + state.get("continue_session"), state.get("store_workspace"))) + return task @@ -819,6 +920,12 @@ def monitor_ssh_tunnel(state, task): local_vscode_port_ = local_vscode_port default_section = _get_config_section_name()[0] + + workspace_header_msg = '' + if task.get_parameter("{}/store_workspace".format(default_section)): + workspace_header_msg = "Workspace at '{}' will be automatically synchronized when shutting down".format( + task.get_parameter("{}/store_workspace".format(default_section))) + local_remote_pair_list = [] shutdown = False try: @@ -914,6 +1021,9 @@ def monitor_ssh_tunnel(state, task): local_vscode_port=local_vscode_port) if state.get('user_folder'): msg += "?folder={}".format(state.get('user_folder')) + if workspace_header_msg: + msg += "\n\n{}".format(workspace_header_msg) + print(msg) print(connect_message) else: @@ -986,6 +1096,114 @@ def monitor_ssh_tunnel(state, task): pass +class CliCommands: + state = dict() + + @classmethod + def list_sessions(cls, args): + client = APIClient() + filters = { + 'status': ['in_progress'], + 'system_tags': [system_tag], + 'page_size': 500, 'page': 0, + 'order_by': ['-last_update'], + 'only_fields': ['id', 'created', 'name', 'project', 'tags'] + } + if args.session_tags: + filters['tags'] = args.session_tags + if args.project: + filters['project'] = [Task.get_project_id(project_name=args.project)] + if not args.all_users: + current_user_id = _get_user_id(client) + filters['user'] = [current_user_id] + + msg = "Listing active sessions tags=[{}] project=[{}] all_users={}".format( + args.session_tags or "*", args.project or "*", args.all_users) + print(msg + "\n" + ("-" * len(msg))) + + session_tasks = client.tasks.get_all(**filters) + if not session_tasks: + print("No interactive sessions found") + return 0 + + project_names = dict() + for i, t in enumerate(session_tasks): + # noinspection PyProtectedMember + pname = project_names.get(t.project, Task._get_project_name(t.project)) if t.project else "" + print("{}] id={} name='{}' tags={} project='{}'".format(i, t.id, t.name, t.tags, pname)) + + return 0 + + @classmethod + def session_info(cls, args): + print("Fetching interactive session details:") + client = APIClient() + try: + tasks = client.tasks.get_all(**{ + 'id': [args.id], + 'page_size': 10, 'page': 0, + 'order_by': ['-last_update'], + 'only_fields': ['id', 'created', 'parent', 'status', 'project', 'tags', 'system_tags', 'type'] + }) + except APIError: + tasks = None + + if tasks: + tid = tasks[0].id + t = Task.get_task(task_id=tid) + print( + " status={}\n".format(t.data.status) + + " id={}\n".format(t.id) + + " name={}\n".format(t.name) + + " project={}\n".format(t.get_project_name()) + + " tags={}\n".format(t.get_tags()) + + " log={}\n".format(t.get_output_log_web_page()) + ) + return 0 + else: + print("ERROR: Interactive session id={} not found".format(args.id)) + return 1 + + @classmethod + def shutdown_session(cls, args): + task_id = args.id or args.shutdown + print("Shutting down session id={}".format(task_id)) + client = APIClient() + try: + tasks = client.tasks.get_all(**{ + 'id': [args.id], + 'page_size': 10, 'page': 0, + 'order_by': ['-last_update'], + 'only_fields': ['id', 'created', 'parent', 'status', 'project', 'tags', 'system_tags', 'type'] + }) + except APIError: + tasks = None + + if not tasks: + print("ERROR: Interactive session id={} not found".format(args.id)) + return 1 + + try: + task = _get_previous_session( + client, args, cls.state, + task_id=task_id, + verb="Shutting down", + question_verb="Shutdown", + ask_for_explicit_id=True + ) + except ValueError: + print("Warning: session not running - skipping shutdown") + return 0 + + if not task: + print("Warning: skipping session shutdown") + return 0 + + task.mark_stopped() + print("Session #{} shutdown".format(task.id)) + return 0 + + def setup_parser(parser): parser.add_argument('--version', action='store_true', default=None, help='Display the clearml-session utility version') @@ -1030,6 +1248,15 @@ def setup_parser(parser): help='Advanced: Upload local files/folders to the remote session. ' 'Example: `/my/local/data/` will upload the local folder and extract it ' 'into the container in ~/session-files/') + parser.add_argument('--continue-session', type=str, default=None, + help='Continue previous session (ID provided) ' + 'restoring your workspace (see --store-workspace)') + parser.add_argument('--store-workspace', type=str, default=None, + help='Upload/Restore remote workspace folder. ' + 'Example: `~/workspace/` will automatically restore/store the *containers* folder ' + 'and extract it into next the session. ' + 'Use with --continue-session to continue your ' + 'previous work from your exact container state') parser.add_argument('--git-credentials', default=False, nargs='?', const='true', metavar='true/false', type=lambda x: (str(x).strip().lower() in ('true', 'yes')), help='If true, local .git-credentials file is sent to the interactive session. ' @@ -1059,6 +1286,13 @@ def setup_parser(parser): '(default: previously used Task). Use `none` for the default interactive session') parser.add_argument('--project', type=str, default=None, help='Advanced: Set the project name for the interactive session Task') + parser.add_argument('--session-name', type=str, default=None, + help='Advanced: Set the name of the interactive session Task') + parser.add_argument('--session-tags', type=str, nargs='*', default=None, + help='Advanced: Add tags to the interactive session for increased visibility') + parser.add_argument('--disable-session-cleanup', default=False, nargs='?', const='true', metavar='true/false', + type=lambda x: (str(x).strip().lower() in ('true', 'yes')), + help='Advanced: If set, previous interactive sessions are not deleted') parser.add_argument('--keepalive', default=False, nargs='?', const='true', metavar='true/false', type=lambda x: (str(x).strip().lower() in ('true', 'yes')), help='Advanced: If set, enables the transparent proxy always keeping the sockets alive. ' @@ -1067,7 +1301,7 @@ def setup_parser(parser): help='Advanced: Excluded queues with this specific tag from the selection') parser.add_argument('--queue-include-tag', default=None, nargs='*', help='Advanced: Only include queues with this specific tag from the selection') - parser.add_argument('--skip-docker-network', default=None, nargs='?', const='true', metavar='true/false', + parser.add_argument('--skip-docker-network', default=None, nargs='?', const='true', metavar='true/false', type=lambda x: (str(x).strip().lower() in ('true', 'yes')), help='Advanced: If set, `--network host` is **not** passed to docker ' '(assumes k8s network ingestion) (default: false)') @@ -1077,7 +1311,7 @@ def setup_parser(parser): parser.add_argument('--username', type=str, default=None, help='Advanced: Select ssh username for the interactive session ' '(default: `root` or previously used one)') - parser.add_argument('--force_dropbear', default=None, nargs='?', const='true', metavar='true/false', + parser.add_argument('--force-dropbear', default=None, nargs='?', const='true', metavar='true/false', type=lambda x: (str(x).strip().lower() in ('true', 'yes')), help='Force using `dropbear` instead of SSHd') parser.add_argument('--verbose', action='store_true', default=None, @@ -1088,6 +1322,26 @@ def setup_parser(parser): help='Automatic yes to prompts; assume \"yes\" as answer ' 'to all prompts and run non-interactively',) + subparsers = parser.add_subparsers(help='ClearML session control commands', dest='command') + + parser_list = subparsers.add_parser('list', help='List running Sessions') + parser_list.add_argument( + '--all_users', '-a', + action='store_true', default=False, + help='Return all running sessions (from all users). ' + 'Default: return Only current users sessions',) + parser_list.set_defaults(func=CliCommands.list_sessions) + + parser_info = subparsers.add_parser('info', help='Detailed information on specific session') + parser_info.add_argument( + '--id', type=str, default=None, help='Interactive session information details') + parser_info.set_defaults(func=CliCommands.session_info) + + parser_shutdown = subparsers.add_parser('shutdown', help='Shutdown specific session') + parser_shutdown.add_argument( + '--id', type=str, default=None, help='Session ID to be shutdown') + parser_shutdown.set_defaults(func=CliCommands.shutdown_session) + def get_version(): from .version import __version__ @@ -1095,11 +1349,11 @@ def get_version(): def cli(): - title = 'clearml-session - CLI for launching JupyterLab / VSCode on a remote machine' + title = 'clearml-session - CLI for launching JupyterLab / VSCode / SSH on a remote machine' print(title) parser = ArgumentParser( prog='clearml-session', description=title, - epilog='Notice! all arguments are stored as new defaults for the next session') + epilog='Notice! all arguments are stored as new defaults for the next execution') setup_parser(parser) # get the args @@ -1109,14 +1363,6 @@ def cli(): print('Version {}'.format(get_version())) exit(0) - # check ssh - if not _check_ssh_executable(): - raise ValueError("Could not locate SSH executable") - - # check clearml.conf - if not _check_configuration(): - raise ValueError("ClearML configuration not found. Please run `clearml-init`") - # load previous state state_file = os.path.abspath(os.path.expandvars(os.path.expanduser(args.config_file))) state = load_state(state_file) @@ -1126,8 +1372,26 @@ def cli(): state['shell'] = bool(args.shell) + if args.command: + if args.command in ("info", "shutdown") and not args.id: + print("Notice! session info requires ID but it was not provided") + return + + CliCommands.state = state + args.func(args) + return + + # check ssh + if not _check_ssh_executable(): + raise ValueError("Could not locate SSH executable") + + # check clearml.conf + if not _check_configuration(): + raise ValueError("ClearML configuration not found. Please run `clearml-init`") + client = APIClient() + # to be deprecated if args.shutdown is not None: task = _get_previous_session( client, args, state, task_id=args.shutdown, verb="Shutting down", @@ -1169,6 +1433,12 @@ def cli(): # save state save_state(state, state_file) + # find previous workspace is needed + find_prev_session(state, client) + + # ask user final approval + ask_launch(args) + # remove old Tasks created by us. delete_old_tasks(state, client, state.get('base_task_id')) diff --git a/clearml_session/interactive_session_task.py b/clearml_session/interactive_session_task.py index 0a3819c..27c0b00 100644 --- a/clearml_session/interactive_session_task.py +++ b/clearml_session/interactive_session_task.py @@ -1,18 +1,22 @@ import base64 import json import os +import shutil import socket import subprocess import sys from copy import deepcopy import getpass -from tempfile import mkstemp, gettempdir -from time import sleep +from functools import partial +from tempfile import mkstemp, gettempdir, mkdtemp +from time import sleep, time +from datetime import datetime import psutil import requests from clearml import Task, StorageManager from clearml.backend_api import Session +from clearml.backend_api.services import tasks from pathlib2 import Path # noinspection SpellCheckingInspection @@ -75,8 +79,10 @@ default_ssh_fingerprint = { config_section_name = 'interactive_session' config_object_section_ssh = 'SSH' config_object_section_bash_init = 'interactive_init_script' - - +artifact_workspace_name = "workspace" +sync_runtime_property = "workspace_sync_ts" +sync_workspace_creating_id = "created_by_session" +__poor_lock = [] __allocated_ports = [] @@ -94,7 +100,10 @@ def init_task(param, a_default_ssh_fingerprint): Task.add_requirements('jupyterlab') Task.add_requirements('jupyterlab_git') task = Task.init( - project_name="DevOps", task_name="Allocate Jupyter Notebook Instance", task_type=Task.TaskTypes.service) + project_name="DevOps", + task_name="Allocate Jupyter Notebook Instance", + task_type=Task.TaskTypes.service + ) # Add jupyter server base folder if Session.check_min_api_version('2.13'): @@ -116,7 +125,7 @@ def init_task(param, a_default_ssh_fingerprint): else: task.connect(param, name=config_section_name) - # connect ssh finger print configuration (with fallback if section is missing) + # connect ssh fingerprint configuration (with fallback if section is missing) old_default_ssh_fingerprint = deepcopy(a_default_ssh_fingerprint) try: task.connect_configuration(configuration=a_default_ssh_fingerprint, name=config_object_section_ssh) @@ -476,6 +485,13 @@ def start_jupyter_server(hostname, hostnames, param, task, env, bind_ip="127.0.0 env = dict(**env) env['PATH'] = '{}:{}'.format(Path(sys.executable).parent.as_posix(), env.get('PATH', '')) + try: + # set default shell to bash if not defined + if not env.get("SHELL") and shutil.which("bash"): + env['SHELL'] = shutil.which("bash") + except Exception as ex: + print("WARNING: failed finding default shell bash: {}".format(ex)) + # make sure we have the needed cwd # noinspection PyBroadException try: @@ -999,6 +1015,169 @@ def run_user_init_script(task): os.environ['CLEARML_DOCKER_BASH_SCRIPT'] = str(init_script) +def _sync_workspace_snapshot(task, param): + workspace_folder = param.get("store_workspace") + if not workspace_folder: + # nothing to do + return + + print("Syncing workspace {}".format(workspace_folder)) + + workspace_folder = Path(os.path.expandvars(workspace_folder)).expanduser() + if not workspace_folder.is_dir(): + print("WARNING: failed to create workspace snapshot from '{}' - " + "directory does not exist".format(workspace_folder)) + return + + # build hash of + files_desc = "" + for f in workspace_folder.rglob("*"): + fs = f.stat() + files_desc += "{}: {}[{}]\n".format(f.absolute(), fs.st_size, fs.st_mtime) + workspace_hash = hash(str(files_desc)) + if param.get("workspace_hash") == workspace_hash: + print("Skipping workspace snapshot upload, " + "already uploaded no files changed since last sync {}".format(param.get(sync_runtime_property))) + return + + print("Uploading workspace: {}".format(workspace_folder)) + + # force running status - so that we can upload the artifact + if task.status not in ("in_progress", ): + task.mark_started(force=True) + + try: + # create a tar file of the folder + # put a consistent file name into a temp folder because the filename is part of + # the compressed artifact, and we want consistency in hash. + # After that we rename compressed file to temp file and + temp_folder = Path(mkdtemp(prefix='workspace_')) + local_gzip = (temp_folder / "workspace_snapshot").as_posix() + # notice it will add a ".tar.gz" suffix to the file + local_gzip = shutil.make_archive( + base_name=local_gzip, format="gztar", root_dir=workspace_folder.as_posix()) + if not local_gzip: + print("ERROR: Failed compressing workspace [{}]".format(workspace_folder)) + raise ValueError("Failed compressing workspace") + + # list archived files for preview + files = list(workspace_folder.rglob("*")) + archive_preview = 'Archive content {}:\n'.format(workspace_folder) + for filename in sorted(files): + if filename.is_file(): + relative_file_name = filename.relative_to(workspace_folder) + archive_preview += '{} - {:,} B\n'.format(relative_file_name, filename.stat().st_size) + + # upload actual snapshot tgz + task.upload_artifact( + name=artifact_workspace_name, + artifact_object=Path(local_gzip), + delete_after_upload=True, + preview=archive_preview, + metadata={"timestamp": str(datetime.utcnow()), sync_workspace_creating_id: task.id}, + wait_on_upload=True, + retries=3 + ) + + try: + temp_folder.rmdir() + except Exception as ex: + print("Warning: Failed removing temp artifact folder: {}".format(ex)) + + print("Finalizing workspace sync") + + # change artifact to input artifact + task.reload() + # find our artifact and update it + for a in task.data.execution.artifacts: + if a.key != artifact_workspace_name: + # nothing to do + continue + elif a.mode == tasks.ArtifactModeEnum.input: + # the old input entry - we are changing to output artifact + # the reason is that we want this entry to be deleted with this Task + # in contrast to Input entries that are Not deleted when deleting the Task + a.mode = tasks.ArtifactModeEnum.output + a.key = "old_" + str(a.key) + else: + # set the new entry as an input artifact + a.mode = tasks.ArtifactModeEnum.input + + # noinspection PyProtectedMember + task._edit(execution=task.data.execution, force=True) + task.reload() + + # update our timestamp & hash + param[sync_runtime_property] = time() + param["workspace_hash"] = workspace_hash + # noinspection PyProtectedMember + task._set_runtime_properties(runtime_properties={sync_runtime_property: time()}) + print("[{}] Workspace '{}' snapshot synced".format(datetime.utcnow(), workspace_folder)) + except Exception as ex: + print("ERROR: Failed syncing workspace [{}]: {}".format(workspace_folder, ex)) + finally: + task.mark_stopped(force=True, status_message="workspace shutdown sync completed") + + +def sync_workspace_snapshot(task, param): + __poor_lock.append(time()) + if len(__poor_lock) != 1: + # someone is already in, we should leave + __poor_lock.pop(-1) + + try: + return _sync_workspace_snapshot(task, param) + finally: + __poor_lock.pop(-1) + + +def restore_workspace(task, param): + if not param.get("store_workspace"): + # check if we have something to restore, show warning + if artifact_workspace_name in task.artifacts: + print("WARNING: Found workspace snapshot, but ignoring since store_workspace is 'None'") + return + + # add sync callback, timeout 5 min + print("Setting workspace snapshot sync callback on session end") + task.register_abort_callback( + partial(sync_workspace_snapshot, task, param), + callback_execution_timeout=60*5) + + try: + workspace_folder = Path(os.path.expandvars(param.get("store_workspace"))).expanduser() + workspace_folder.mkdir(parents=True, exist_ok=True) + except Exception as ex: + print("ERROR: Could not create workspace folder {}: {}".format( + param.get("store_workspace"), ex)) + return + + if artifact_workspace_name not in task.artifacts: + print("No workspace snapshot was found, a new workspace snapshot [{}] " + "will be created when session ends".format(workspace_folder)) + return + + print("Fetching previous workspace snapshot") + artifact_zip_file = task.artifacts[artifact_workspace_name].get_local_copy(extract_archive=False) + print("Restoring workspace snapshot") + try: + shutil.unpack_archive(artifact_zip_file, extract_dir=workspace_folder.as_posix()) + except Exception as ex: + print("ERROR: restoring workspace snapshot failed: {}".format(ex)) + return + + # remove the workspace from the cache + try: + os.unlink(artifact_zip_file) + except Exception as ex: + print("WARNING: temp workspace zip could not be removed: {}".format(ex)) + + print("Successfully restored workspace checkpoint to {}".format(workspace_folder)) + # set time stamp + # noinspection PyProtectedMember + task._set_runtime_properties(runtime_properties={sync_runtime_property: time()}) + + def main(): param = { "user_base_directory": "~/", @@ -1014,11 +1193,19 @@ def main(): "public_ip": False, "ssh_ports": None, "force_dropbear": False, + "store_workspace": None, } task = init_task(param, default_ssh_fingerprint) run_user_init_script(task) + # restore workspace if exists + # notice, if "store_workspace" is not set we will Not restore the workspace + try: + restore_workspace(task, param) + except Exception as ex: + print("ERROR: Failed restoring workspace: {}".format(ex)) + hostname, hostnames = get_host_name(task, param) env = setup_user_env(param, task) @@ -1029,7 +1216,11 @@ def main(): start_jupyter_server(hostname, hostnames, param, task, env) - print('We are done') + print('We are done - sync workspace if needed') + + sync_workspace_snapshot(task, param) + + print('Goodbye') if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index 071b61e..16ae438 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -clearml >= 1.1.5 +clearml >= 1.9 pexpect ; sys_platform != 'win32' wexpect_venv ; sys_platform == 'win32' -pillow>=10.0.1 # not directly required, pinned by Snyk to avoid a vulnerability