mirror of
https://github.com/clearml/clearml-session
synced 2025-06-11 17:01:22 +00:00
Upgrade vscode_version to '4.96.2' and python_ext_version to '2024.22.1'
Upgrade Dropbear to 2024.86 Fix mark task stopped, not stopping it to allow easier artifact uploading
This commit is contained in:
parent
610bdba72f
commit
e0a79f7ce7
@ -24,6 +24,7 @@ else:
|
|||||||
import psutil
|
import psutil
|
||||||
from clearml import Task
|
from clearml import Task
|
||||||
from clearml.backend_api.session.client import APIClient, APIError
|
from clearml.backend_api.session.client import APIClient, APIError
|
||||||
|
from clearml.backend_api.services import tasks
|
||||||
from clearml.config import config_obj
|
from clearml.config import config_obj
|
||||||
from clearml.backend_api import Session
|
from clearml.backend_api import Session
|
||||||
from .tcp_proxy import TcpProxy
|
from .tcp_proxy import TcpProxy
|
||||||
@ -149,6 +150,27 @@ def _get_available_ports(list_initial_ports):
|
|||||||
return available_ports
|
return available_ports
|
||||||
|
|
||||||
|
|
||||||
|
def request_task_abort(task, force=False, status_message=None):
|
||||||
|
res = task.send(
|
||||||
|
tasks.StopRequest(
|
||||||
|
task.id, force=False,
|
||||||
|
status_reason="abort request",
|
||||||
|
status_message=status_message),
|
||||||
|
ignore_errors=True
|
||||||
|
)
|
||||||
|
# if we failed to request, mark it stopped
|
||||||
|
if res and not res.ok():
|
||||||
|
print(f"INFO: failed sending abort request, forcefully stopping task {task.id}")
|
||||||
|
task.mark_stopped(
|
||||||
|
force=force,
|
||||||
|
status_message=status_message,
|
||||||
|
status_reason="abort request failed, setting forcefully"
|
||||||
|
)
|
||||||
|
return True
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
def create_base_task(state, project_name=None, task_name=None, continue_task_id=None, project_id=None):
|
def create_base_task(state, project_name=None, task_name=None, continue_task_id=None, project_id=None):
|
||||||
if continue_task_id:
|
if continue_task_id:
|
||||||
task = Task.clone(
|
task = Task.clone(
|
||||||
@ -1178,7 +1200,7 @@ def monitor_ssh_tunnel(state, task, ssh_setup_completed_callback=None):
|
|||||||
continue
|
continue
|
||||||
elif user_input.lower() == 'shutdown':
|
elif user_input.lower() == 'shutdown':
|
||||||
print('Shutting down interactive session')
|
print('Shutting down interactive session')
|
||||||
task.mark_stopped()
|
request_task_abort(task)
|
||||||
shutdown = True
|
shutdown = True
|
||||||
break
|
break
|
||||||
elif user_input.lower() in ('r', 'reconnect', ):
|
elif user_input.lower() in ('r', 'reconnect', ):
|
||||||
@ -1313,7 +1335,7 @@ class CliCommands:
|
|||||||
print("Warning: skipping session shutdown")
|
print("Warning: skipping session shutdown")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
task.mark_stopped()
|
request_task_abort(task)
|
||||||
print("Session #{} shutdown".format(task.id))
|
print("Session #{} shutdown".format(task.id))
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
@ -1517,7 +1539,7 @@ def cli():
|
|||||||
if not task:
|
if not task:
|
||||||
print("No session to shut down, exiting")
|
print("No session to shut down, exiting")
|
||||||
return 1
|
return 1
|
||||||
task.mark_stopped()
|
request_task_abort(task)
|
||||||
print("Session #{} shut down, goodbye!".format(task.id))
|
print("Session #{} shut down, goodbye!".format(task.id))
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
@ -17,7 +17,7 @@ import requests
|
|||||||
from clearml import Task, StorageManager
|
from clearml import Task, StorageManager
|
||||||
from clearml.backend_api import Session
|
from clearml.backend_api import Session
|
||||||
from clearml.backend_api.services import tasks
|
from clearml.backend_api.services import tasks
|
||||||
from pathlib2 import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# noinspection SpellCheckingInspection
|
# noinspection SpellCheckingInspection
|
||||||
default_ssh_fingerprint = {
|
default_ssh_fingerprint = {
|
||||||
@ -281,8 +281,8 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0.
|
|||||||
|
|
||||||
# get vscode version and python extension version
|
# get vscode version and python extension version
|
||||||
# they are extremely flaky, this combination works, most do not.
|
# they are extremely flaky, this combination works, most do not.
|
||||||
vscode_version = '4.14.1'
|
vscode_version = '4.96.2'
|
||||||
python_ext_version = '2023.12.0'
|
python_ext_version = '2024.22.1'
|
||||||
if param.get("vscode_version"):
|
if param.get("vscode_version"):
|
||||||
vscode_version_parts = param.get("vscode_version").split(':')
|
vscode_version_parts = param.get("vscode_version").split(':')
|
||||||
vscode_version = vscode_version_parts[0]
|
vscode_version = vscode_version_parts[0]
|
||||||
@ -294,9 +294,13 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0.
|
|||||||
env.pop('PYTHONPATH', None)
|
env.pop('PYTHONPATH', None)
|
||||||
|
|
||||||
# example of CLEARML_SESSION_VSCODE_PY_EXT value
|
# example of CLEARML_SESSION_VSCODE_PY_EXT value
|
||||||
# 'https://github.com/microsoft/vscode-python/releases/download/{}/ms-python-release.vsix'
|
# 'https://marketplace.visualstudio.com/_apis/public/gallery/publishers/ms-python/vsextensions/python/2022.12.0/vspackage'
|
||||||
|
# (see https://marketplace.visualstudio.com/items?itemName=ms-python.python).
|
||||||
python_ext_download_link = os.environ.get("CLEARML_SESSION_VSCODE_PY_EXT")
|
python_ext_download_link = os.environ.get("CLEARML_SESSION_VSCODE_PY_EXT")
|
||||||
|
|
||||||
|
# example of CLEARML_SESSION_VSCODE_SERVER_DEB value
|
||||||
|
# 'https://github.com/coder/code-server/releases/download/v4.96.2/code-server_4.96.2_amd64.deb'
|
||||||
|
# (see https://github.com/coder/code-server/releases)
|
||||||
code_server_deb_download_link = \
|
code_server_deb_download_link = \
|
||||||
os.environ.get("CLEARML_SESSION_VSCODE_SERVER_DEB") or \
|
os.environ.get("CLEARML_SESSION_VSCODE_SERVER_DEB") or \
|
||||||
'https://github.com/coder/code-server/releases/download/v{version}/code-server_{version}_amd64.deb'
|
'https://github.com/coder/code-server/releases/download/v{version}/code-server_{version}_amd64.deb'
|
||||||
@ -433,6 +437,7 @@ def start_vscode_server(hostname, hostnames, param, task, env, bind_ip="127.0.0.
|
|||||||
"security.workspace.trust.untrustedFiles": "open",
|
"security.workspace.trust.untrustedFiles": "open",
|
||||||
# "security.workspace.trust.startupPrompt": "never",
|
# "security.workspace.trust.startupPrompt": "never",
|
||||||
"security.workspace.trust.enabled": False,
|
"security.workspace.trust.enabled": False,
|
||||||
|
"telemetry.telemetryLevel": "off",
|
||||||
})
|
})
|
||||||
with open(settings.as_posix(), 'wt') as f:
|
with open(settings.as_posix(), 'wt') as f:
|
||||||
json.dump(base_json, f)
|
json.dump(base_json, f)
|
||||||
@ -659,7 +664,7 @@ def setup_ssh_server(hostname, hostnames, param, task, env):
|
|||||||
print('WARNING: SSHd was not found defaulting to user-space dropbear sshd server')
|
print('WARNING: SSHd was not found defaulting to user-space dropbear sshd server')
|
||||||
dropbear_download_link = \
|
dropbear_download_link = \
|
||||||
os.environ.get("CLEARML_DROPBEAR_EXEC") or \
|
os.environ.get("CLEARML_DROPBEAR_EXEC") or \
|
||||||
'https://github.com/allegroai/dropbear/releases/download/DROPBEAR_CLEARML_2023.02/dropbearmulti'
|
'https://github.com/allegroai/dropbear/releases/download/DROPBEAR_CLEARML_2024.86/dropbearmulti'
|
||||||
dropbear = StorageManager.get_local_copy(dropbear_download_link, extract_archive=False)
|
dropbear = StorageManager.get_local_copy(dropbear_download_link, extract_archive=False)
|
||||||
os.chmod(dropbear, 0o744)
|
os.chmod(dropbear, 0o744)
|
||||||
sshd_path = dropbear
|
sshd_path = dropbear
|
||||||
@ -1106,7 +1111,8 @@ def _sync_workspace_snapshot(task, param):
|
|||||||
print("Uploading workspace: {}".format(workspace_folder))
|
print("Uploading workspace: {}".format(workspace_folder))
|
||||||
|
|
||||||
# force running status - so that we can upload the artifact
|
# force running status - so that we can upload the artifact
|
||||||
if task.status not in ("in_progress", ):
|
prev_status = task.status
|
||||||
|
if prev_status not in ("in_progress", ):
|
||||||
task.mark_started(force=True)
|
task.mark_started(force=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@ -1179,7 +1185,12 @@ def _sync_workspace_snapshot(task, param):
|
|||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("ERROR: Failed syncing workspace [{}]: {}".format(workspace_folder, ex))
|
print("ERROR: Failed syncing workspace [{}]: {}".format(workspace_folder, ex))
|
||||||
finally:
|
finally:
|
||||||
task.mark_stopped(force=True, status_message="workspace shutdown sync completed")
|
if prev_status in ("failed", ):
|
||||||
|
task.mark_failed(force=True, status_message="workspace shutdown sync completed")
|
||||||
|
elif prev_status in ("completed", ):
|
||||||
|
task.mark_completed(force=True, status_message="workspace shutdown sync completed")
|
||||||
|
else:
|
||||||
|
task.mark_stopped(force=True, status_message="workspace shutdown sync completed")
|
||||||
|
|
||||||
|
|
||||||
def sync_workspace_snapshot(task, param):
|
def sync_workspace_snapshot(task, param):
|
||||||
@ -1199,7 +1210,7 @@ def restore_workspace(task, param):
|
|||||||
# check if we have something to restore, show warning
|
# check if we have something to restore, show warning
|
||||||
if artifact_workspace_name in task.artifacts:
|
if artifact_workspace_name in task.artifacts:
|
||||||
print("WARNING: Found workspace snapshot, but ignoring since store_workspace is 'None'")
|
print("WARNING: Found workspace snapshot, but ignoring since store_workspace is 'None'")
|
||||||
return
|
return None
|
||||||
|
|
||||||
# add sync callback, timeout 5 min
|
# add sync callback, timeout 5 min
|
||||||
print("Setting workspace snapshot sync callback on session end")
|
print("Setting workspace snapshot sync callback on session end")
|
||||||
@ -1213,21 +1224,25 @@ def restore_workspace(task, param):
|
|||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("ERROR: Could not create workspace folder {}: {}".format(
|
print("ERROR: Could not create workspace folder {}: {}".format(
|
||||||
param.get("store_workspace"), ex))
|
param.get("store_workspace"), ex))
|
||||||
return
|
return None
|
||||||
|
|
||||||
if artifact_workspace_name not in task.artifacts:
|
if artifact_workspace_name not in task.artifacts:
|
||||||
print("No workspace snapshot was found, a new workspace snapshot [{}] "
|
print("No workspace snapshot was found, a new workspace snapshot [{}] "
|
||||||
"will be created when session ends".format(workspace_folder))
|
"will be created when session ends".format(workspace_folder))
|
||||||
return
|
return None
|
||||||
|
|
||||||
print("Fetching previous workspace snapshot")
|
print("Fetching previous workspace snapshot")
|
||||||
artifact_zip_file = task.artifacts[artifact_workspace_name].get_local_copy(extract_archive=False)
|
artifact_zip_file = task.artifacts[artifact_workspace_name].get_local_copy(extract_archive=False)
|
||||||
|
if not artifact_zip_file:
|
||||||
|
print("Error: Fetching previous workspace snapshot Failed! skipping workspace restore")
|
||||||
|
return None
|
||||||
|
|
||||||
print("Restoring workspace snapshot")
|
print("Restoring workspace snapshot")
|
||||||
try:
|
try:
|
||||||
shutil.unpack_archive(artifact_zip_file, extract_dir=workspace_folder.as_posix())
|
shutil.unpack_archive(artifact_zip_file, extract_dir=workspace_folder.as_posix())
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("ERROR: restoring workspace snapshot failed: {}".format(ex))
|
print("ERROR: restoring workspace snapshot failed: {}".format(ex))
|
||||||
return
|
return None
|
||||||
|
|
||||||
# remove the workspace from the cache
|
# remove the workspace from the cache
|
||||||
try:
|
try:
|
||||||
@ -1239,6 +1254,7 @@ def restore_workspace(task, param):
|
|||||||
# set time stamp
|
# set time stamp
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
task._set_runtime_properties(runtime_properties={sync_runtime_property: time()})
|
task._set_runtime_properties(runtime_properties={sync_runtime_property: time()})
|
||||||
|
return workspace_folder
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@ -1270,6 +1286,10 @@ def main():
|
|||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
print("ERROR: Failed restoring workspace: {}".format(ex))
|
print("ERROR: Failed restoring workspace: {}".format(ex))
|
||||||
|
|
||||||
|
# make the new user base folder the workspace directory
|
||||||
|
if (param["store_workspace"] or "").strip():
|
||||||
|
param["user_base_directory"] = param["store_workspace"]
|
||||||
|
|
||||||
hostname, hostnames = get_host_name(task, param)
|
hostname, hostnames = get_host_name(task, param)
|
||||||
|
|
||||||
env = setup_user_env(param, task)
|
env = setup_user_env(param, task)
|
||||||
@ -1284,6 +1304,9 @@ def main():
|
|||||||
|
|
||||||
sync_workspace_snapshot(task, param)
|
sync_workspace_snapshot(task, param)
|
||||||
|
|
||||||
|
# sync back python packages for next time
|
||||||
|
# TODO: sync python environment
|
||||||
|
|
||||||
print('Goodbye')
|
print('Goodbye')
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user