From e0a79f7ce7512a87e81f4d81c6acd52c19b100a2 Mon Sep 17 00:00:00 2001 From: clearml <> Date: Sun, 5 Jan 2025 12:17:40 +0200 Subject: [PATCH] =?UTF-8?q?Upgrade=C2=A0vscode=5Fversion=20to=20'4.96.2'?= =?UTF-8?q?=20and=20python=5Fext=5Fversion=20to=20'2024.22.1'=20Upgrade=20?= =?UTF-8?q?Dropbear=20to=202024.86=20Fix=20mark=20task=20stopped,=20not=20?= =?UTF-8?q?stopping=20it=20to=20allow=20easier=20artifact=20uploading?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- clearml_session/__main__.py | 28 +++++++++++-- clearml_session/interactive_session_task.py | 45 ++++++++++++++++----- 2 files changed, 59 insertions(+), 14 deletions(-) diff --git a/clearml_session/__main__.py b/clearml_session/__main__.py index c09568b..2c6bb1d 100644 --- a/clearml_session/__main__.py +++ b/clearml_session/__main__.py @@ -24,6 +24,7 @@ else: import psutil from clearml import Task from clearml.backend_api.session.client import APIClient, APIError +from clearml.backend_api.services import tasks from clearml.config import config_obj from clearml.backend_api import Session from .tcp_proxy import TcpProxy @@ -149,6 +150,27 @@ def _get_available_ports(list_initial_ports): return available_ports +def request_task_abort(task, force=False, status_message=None): + res = task.send( + tasks.StopRequest( + task.id, force=False, + status_reason="abort request", + status_message=status_message), + ignore_errors=True + ) + # if we failed to request, mark it stopped + if res and not res.ok(): + print(f"INFO: failed sending abort request, forcefully stopping task {task.id}") + task.mark_stopped( + force=force, + status_message=status_message, + status_reason="abort request failed, setting forcefully" + ) + return True + + return res + + def create_base_task(state, project_name=None, task_name=None, continue_task_id=None, project_id=None): if continue_task_id: task = Task.clone( @@ -1178,7 +1200,7 @@ def monitor_ssh_tunnel(state, task, ssh_setup_completed_callback=None): continue elif user_input.lower() == 'shutdown': print('Shutting down interactive session') - task.mark_stopped() + request_task_abort(task) shutdown = True break elif user_input.lower() in ('r', 'reconnect', ): @@ -1313,7 +1335,7 @@ class CliCommands: print("Warning: skipping session shutdown") return 0 - task.mark_stopped() + request_task_abort(task) print("Session #{} shutdown".format(task.id)) return 0 @@ -1517,7 +1539,7 @@ def cli(): if not task: print("No session to shut down, exiting") return 1 - task.mark_stopped() + request_task_abort(task) print("Session #{} shut down, goodbye!".format(task.id)) return 0 diff --git a/clearml_session/interactive_session_task.py b/clearml_session/interactive_session_task.py index 91253ed..624c09a 100644 --- a/clearml_session/interactive_session_task.py +++ b/clearml_session/interactive_session_task.py @@ -17,7 +17,7 @@ import requests from clearml import Task, StorageManager from clearml.backend_api import Session from clearml.backend_api.services import tasks -from pathlib2 import Path +from pathlib import Path # noinspection SpellCheckingInspection default_ssh_fingerprint = { @@ -281,8 +281,8 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0. # get vscode version and python extension version # they are extremely flaky, this combination works, most do not. - vscode_version = '4.14.1' - python_ext_version = '2023.12.0' + vscode_version = '4.96.2' + python_ext_version = '2024.22.1' if param.get("vscode_version"): vscode_version_parts = param.get("vscode_version").split(':') vscode_version = vscode_version_parts[0] @@ -294,9 +294,13 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0. env.pop('PYTHONPATH', None) # example of CLEARML_SESSION_VSCODE_PY_EXT value - # 'https://github.com/microsoft/vscode-python/releases/download/{}/ms-python-release.vsix' + # 'https://marketplace.visualstudio.com/_apis/public/gallery/publishers/ms-python/vsextensions/python/2022.12.0/vspackage' + # (see https://marketplace.visualstudio.com/items?itemName=ms-python.python). python_ext_download_link = os.environ.get("CLEARML_SESSION_VSCODE_PY_EXT") + # example of CLEARML_SESSION_VSCODE_SERVER_DEB value + # 'https://github.com/coder/code-server/releases/download/v4.96.2/code-server_4.96.2_amd64.deb' + # (see https://github.com/coder/code-server/releases) code_server_deb_download_link = \ os.environ.get("CLEARML_SESSION_VSCODE_SERVER_DEB") or \ 'https://github.com/coder/code-server/releases/download/v{version}/code-server_{version}_amd64.deb' @@ -433,6 +437,7 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0. "security.workspace.trust.untrustedFiles": "open", # "security.workspace.trust.startupPrompt": "never", "security.workspace.trust.enabled": False, + "telemetry.telemetryLevel": "off", }) with open(settings.as_posix(), 'wt') as f: json.dump(base_json, f) @@ -659,7 +664,7 @@ def setup_ssh_server(hostname, hostnames, param, task, env): print('WARNING: SSHd was not found defaulting to user-space dropbear sshd server') dropbear_download_link = \ os.environ.get("CLEARML_DROPBEAR_EXEC") or \ - 'https://github.com/allegroai/dropbear/releases/download/DROPBEAR_CLEARML_2023.02/dropbearmulti' + 'https://github.com/allegroai/dropbear/releases/download/DROPBEAR_CLEARML_2024.86/dropbearmulti' dropbear = StorageManager.get_local_copy(dropbear_download_link, extract_archive=False) os.chmod(dropbear, 0o744) sshd_path = dropbear @@ -1106,7 +1111,8 @@ def _sync_workspace_snapshot(task, param): print("Uploading workspace: {}".format(workspace_folder)) # force running status - so that we can upload the artifact - if task.status not in ("in_progress", ): + prev_status = task.status + if prev_status not in ("in_progress", ): task.mark_started(force=True) try: @@ -1179,7 +1185,12 @@ def _sync_workspace_snapshot(task, param): except Exception as ex: print("ERROR: Failed syncing workspace [{}]: {}".format(workspace_folder, ex)) finally: - task.mark_stopped(force=True, status_message="workspace shutdown sync completed") + if prev_status in ("failed", ): + task.mark_failed(force=True, status_message="workspace shutdown sync completed") + elif prev_status in ("completed", ): + task.mark_completed(force=True, status_message="workspace shutdown sync completed") + else: + task.mark_stopped(force=True, status_message="workspace shutdown sync completed") def sync_workspace_snapshot(task, param): @@ -1199,7 +1210,7 @@ def restore_workspace(task, param): # check if we have something to restore, show warning if artifact_workspace_name in task.artifacts: print("WARNING: Found workspace snapshot, but ignoring since store_workspace is 'None'") - return + return None # add sync callback, timeout 5 min print("Setting workspace snapshot sync callback on session end") @@ -1213,21 +1224,25 @@ def restore_workspace(task, param): except Exception as ex: print("ERROR: Could not create workspace folder {}: {}".format( param.get("store_workspace"), ex)) - return + return None if artifact_workspace_name not in task.artifacts: print("No workspace snapshot was found, a new workspace snapshot [{}] " "will be created when session ends".format(workspace_folder)) - return + return None print("Fetching previous workspace snapshot") artifact_zip_file = task.artifacts[artifact_workspace_name].get_local_copy(extract_archive=False) + if not artifact_zip_file: + print("Error: Fetching previous workspace snapshot Failed! skipping workspace restore") + return None + print("Restoring workspace snapshot") try: shutil.unpack_archive(artifact_zip_file, extract_dir=workspace_folder.as_posix()) except Exception as ex: print("ERROR: restoring workspace snapshot failed: {}".format(ex)) - return + return None # remove the workspace from the cache try: @@ -1239,6 +1254,7 @@ def restore_workspace(task, param): # set time stamp # noinspection PyProtectedMember task._set_runtime_properties(runtime_properties={sync_runtime_property: time()}) + return workspace_folder def main(): @@ -1270,6 +1286,10 @@ def main(): except Exception as ex: print("ERROR: Failed restoring workspace: {}".format(ex)) + # make the new user base folder the workspace directory + if (param["store_workspace"] or "").strip(): + param["user_base_directory"] = param["store_workspace"] + hostname, hostnames = get_host_name(task, param) env = setup_user_env(param, task) @@ -1284,6 +1304,9 @@ def main(): sync_workspace_snapshot(task, param) + # sync back python packages for next time + # TODO: sync python environment + print('Goodbye')