Upgrade vscode_version to '4.96.2' and python_ext_version to '2024.22.1'

Upgrade Dropbear to 2024.86
Fix mark task stopped, not stopping it to allow easier artifact uploading
This commit is contained in:
clearml 2025-01-05 12:17:40 +02:00
parent 610bdba72f
commit e0a79f7ce7
2 changed files with 59 additions and 14 deletions

View File

@ -24,6 +24,7 @@ else:
import psutil
from clearml import Task
from clearml.backend_api.session.client import APIClient, APIError
from clearml.backend_api.services import tasks
from clearml.config import config_obj
from clearml.backend_api import Session
from .tcp_proxy import TcpProxy
@ -149,6 +150,27 @@ def _get_available_ports(list_initial_ports):
return available_ports
def request_task_abort(task, force=False, status_message=None):
res = task.send(
tasks.StopRequest(
task.id, force=False,
status_reason="abort request",
status_message=status_message),
ignore_errors=True
)
# if we failed to request, mark it stopped
if res and not res.ok():
print(f"INFO: failed sending abort request, forcefully stopping task {task.id}")
task.mark_stopped(
force=force,
status_message=status_message,
status_reason="abort request failed, setting forcefully"
)
return True
return res
def create_base_task(state, project_name=None, task_name=None, continue_task_id=None, project_id=None):
if continue_task_id:
task = Task.clone(
@ -1178,7 +1200,7 @@ def monitor_ssh_tunnel(state, task, ssh_setup_completed_callback=None):
continue
elif user_input.lower() == 'shutdown':
print('Shutting down interactive session')
task.mark_stopped()
request_task_abort(task)
shutdown = True
break
elif user_input.lower() in ('r', 'reconnect', ):
@ -1313,7 +1335,7 @@ class CliCommands:
print("Warning: skipping session shutdown")
return 0
task.mark_stopped()
request_task_abort(task)
print("Session #{} shutdown".format(task.id))
return 0
@ -1517,7 +1539,7 @@ def cli():
if not task:
print("No session to shut down, exiting")
return 1
task.mark_stopped()
request_task_abort(task)
print("Session #{} shut down, goodbye!".format(task.id))
return 0

View File

@ -17,7 +17,7 @@ import requests
from clearml import Task, StorageManager
from clearml.backend_api import Session
from clearml.backend_api.services import tasks
from pathlib2 import Path
from pathlib import Path
# noinspection SpellCheckingInspection
default_ssh_fingerprint = {
@ -281,8 +281,8 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0.
# get vscode version and python extension version
# they are extremely flaky, this combination works, most do not.
vscode_version = '4.14.1'
python_ext_version = '2023.12.0'
vscode_version = '4.96.2'
python_ext_version = '2024.22.1'
if param.get("vscode_version"):
vscode_version_parts = param.get("vscode_version").split(':')
vscode_version = vscode_version_parts[0]
@ -294,9 +294,13 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0.
env.pop('PYTHONPATH', None)
# example of CLEARML_SESSION_VSCODE_PY_EXT value
# 'https://github.com/microsoft/vscode-python/releases/download/{}/ms-python-release.vsix'
# 'https://marketplace.visualstudio.com/_apis/public/gallery/publishers/ms-python/vsextensions/python/2022.12.0/vspackage'
# (see https://marketplace.visualstudio.com/items?itemName=ms-python.python).
python_ext_download_link = os.environ.get("CLEARML_SESSION_VSCODE_PY_EXT")
# example of CLEARML_SESSION_VSCODE_SERVER_DEB value
# 'https://github.com/coder/code-server/releases/download/v4.96.2/code-server_4.96.2_amd64.deb'
# (see https://github.com/coder/code-server/releases)
code_server_deb_download_link = \
os.environ.get("CLEARML_SESSION_VSCODE_SERVER_DEB") or \
'https://github.com/coder/code-server/releases/download/v{version}/code-server_{version}_amd64.deb'
@ -433,6 +437,7 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0.
"security.workspace.trust.untrustedFiles": "open",
# "security.workspace.trust.startupPrompt": "never",
"security.workspace.trust.enabled": False,
"telemetry.telemetryLevel": "off",
})
with open(settings.as_posix(), 'wt') as f:
json.dump(base_json, f)
@ -659,7 +664,7 @@ def setup_ssh_server(hostname, hostnames, param, task, env):
print('WARNING: SSHd was not found defaulting to user-space dropbear sshd server')
dropbear_download_link = \
os.environ.get("CLEARML_DROPBEAR_EXEC") or \
'https://github.com/allegroai/dropbear/releases/download/DROPBEAR_CLEARML_2023.02/dropbearmulti'
'https://github.com/allegroai/dropbear/releases/download/DROPBEAR_CLEARML_2024.86/dropbearmulti'
dropbear = StorageManager.get_local_copy(dropbear_download_link, extract_archive=False)
os.chmod(dropbear, 0o744)
sshd_path = dropbear
@ -1106,7 +1111,8 @@ def _sync_workspace_snapshot(task, param):
print("Uploading workspace: {}".format(workspace_folder))
# force running status - so that we can upload the artifact
if task.status not in ("in_progress", ):
prev_status = task.status
if prev_status not in ("in_progress", ):
task.mark_started(force=True)
try:
@ -1179,7 +1185,12 @@ def _sync_workspace_snapshot(task, param):
except Exception as ex:
print("ERROR: Failed syncing workspace [{}]: {}".format(workspace_folder, ex))
finally:
task.mark_stopped(force=True, status_message="workspace shutdown sync completed")
if prev_status in ("failed", ):
task.mark_failed(force=True, status_message="workspace shutdown sync completed")
elif prev_status in ("completed", ):
task.mark_completed(force=True, status_message="workspace shutdown sync completed")
else:
task.mark_stopped(force=True, status_message="workspace shutdown sync completed")
def sync_workspace_snapshot(task, param):
@ -1199,7 +1210,7 @@ def restore_workspace(task, param):
# check if we have something to restore, show warning
if artifact_workspace_name in task.artifacts:
print("WARNING: Found workspace snapshot, but ignoring since store_workspace is 'None'")
return
return None
# add sync callback, timeout 5 min
print("Setting workspace snapshot sync callback on session end")
@ -1213,21 +1224,25 @@ def restore_workspace(task, param):
except Exception as ex:
print("ERROR: Could not create workspace folder {}: {}".format(
param.get("store_workspace"), ex))
return
return None
if artifact_workspace_name not in task.artifacts:
print("No workspace snapshot was found, a new workspace snapshot [{}] "
"will be created when session ends".format(workspace_folder))
return
return None
print("Fetching previous workspace snapshot")
artifact_zip_file = task.artifacts[artifact_workspace_name].get_local_copy(extract_archive=False)
if not artifact_zip_file:
print("Error: Fetching previous workspace snapshot Failed! skipping workspace restore")
return None
print("Restoring workspace snapshot")
try:
shutil.unpack_archive(artifact_zip_file, extract_dir=workspace_folder.as_posix())
except Exception as ex:
print("ERROR: restoring workspace snapshot failed: {}".format(ex))
return
return None
# remove the workspace from the cache
try:
@ -1239,6 +1254,7 @@ def restore_workspace(task, param):
# set time stamp
# noinspection PyProtectedMember
task._set_runtime_properties(runtime_properties={sync_runtime_property: time()})
return workspace_folder
def main():
@ -1270,6 +1286,10 @@ def main():
except Exception as ex:
print("ERROR: Failed restoring workspace: {}".format(ex))
# make the new user base folder the workspace directory
if (param["store_workspace"] or "").strip():
param["user_base_directory"] = param["store_workspace"]
hostname, hostnames = get_host_name(task, param)
env = setup_user_env(param, task)
@ -1284,6 +1304,9 @@ def main():
sync_workspace_snapshot(task, param)
# sync back python packages for next time
# TODO: sync python environment
print('Goodbye')