Fix support for jupyterlab v3 and add venv agent support

This commit is contained in:
allegroai 2021-01-10 18:29:55 +02:00
parent c8f798275e
commit b30e7b6a03

View File

@ -3,6 +3,8 @@ import os
import socket import socket
import subprocess import subprocess
import sys import sys
from time import sleep
import requests import requests
from copy import deepcopy from copy import deepcopy
from tempfile import mkstemp from tempfile import mkstemp
@ -81,7 +83,7 @@ __allocated_ports = []
def get_free_port(range_min, range_max): def get_free_port(range_min, range_max):
global __allocated_ports global __allocated_ports
used_ports = [i.laddr.port for i in psutil.net_connections()] used_ports = [i.laddr.port for i in psutil.net_connections()]
port = [i for i in range(range_min, range_max) if i not in used_ports and i not in __allocated_ports][0] port = next(i for i in range(range_min, range_max) if i not in used_ports and i not in __allocated_ports)
__allocated_ports.append(port) __allocated_ports.append(port)
return port return port
@ -172,7 +174,7 @@ def monitor_jupyter_server(fd, local_filename, process, task, jupyter_port, host
for line in new_lines: for line in new_lines:
if "http://" not in line and "https://" not in line: if "http://" not in line and "https://" not in line:
continue continue
parts = line.split('/?token=', 1) parts = line.split('?token=', 1)
if len(parts) != 2: if len(parts) != 2:
continue continue
token = parts[1] token = parts[1]
@ -222,6 +224,7 @@ def start_vscode_server(hostname, hostnames, param, task, env):
# find a free tcp port # find a free tcp port
port = get_free_port(9000, 9100) port = get_free_port(9000, 9100)
if os.geteuid() == 0:
# installing VSCODE: # installing VSCODE:
try: try:
python_ext = StorageManager.get_local_copy( python_ext = StorageManager.get_local_copy(
@ -234,6 +237,18 @@ def start_vscode_server(hostname, hostnames, param, task, env):
except Exception as ex: except Exception as ex:
print("Failed installing vscode server: {}".format(ex)) print("Failed installing vscode server: {}".format(ex))
return return
vscode_path = 'code-server'
else:
python_ext = None
# check if code-server exists
# noinspection PyBroadException
try:
vscode_path = subprocess.check_output('which code-server', shell=True).decode().strip()
assert vscode_path
except Exception:
print('Error: Cannot install code-server (not root) and could not find code-server executable, skipping.')
task.set_parameter(name='properties/vscode_port', value=str(-1))
return
cwd = ( cwd = (
os.path.expandvars(os.path.expanduser(param["user_base_directory"])) os.path.expandvars(os.path.expanduser(param["user_base_directory"]))
@ -255,17 +270,16 @@ def start_vscode_server(hostname, hostnames, param, task, env):
fd, local_filename = mkstemp() fd, local_filename = mkstemp()
subprocess.Popen( subprocess.Popen(
[ [
"code-server", vscode_path,
"--auth", "--auth",
"none", "none",
"--bind-addr", "--bind-addr",
"127.0.0.1:{}".format(port), "127.0.0.1:{}".format(port),
"--user-data-dir", user_folder, "--user-data-dir", user_folder,
"--extensions-dir", exts_folder, "--extensions-dir", exts_folder,
"--install-extension", python_ext,
"--install-extension", "ms-toolsai.jupyter", "--install-extension", "ms-toolsai.jupyter",
# "--install-extension", "donjayamanne.python-extension-pack" # "--install-extension", "donjayamanne.python-extension-pack"
], ] + ["--install-extension", python_ext] if python_ext else [],
env=env, env=env,
stdout=fd, stdout=fd,
stderr=fd, stderr=fd,
@ -292,8 +306,8 @@ def start_vscode_server(hostname, hostnames, param, task, env):
pass pass
proc = subprocess.Popen( proc = subprocess.Popen(
['bash', '-c', ['bash', '-c',
'code-server --auth none --bind-addr 127.0.0.1:{} --disable-update-check ' '{} --auth none --bind-addr 127.0.0.1:{} --disable-update-check '
'--user-data-dir {} --extensions-dir {}'.format(port, user_folder, exts_folder)], '--user-data-dir {} --extensions-dir {}'.format(vscode_path, port, user_folder, exts_folder)],
env=env, env=env,
stdout=fd, stdout=fd,
stderr=fd, stderr=fd,
@ -313,6 +327,12 @@ def start_vscode_server(hostname, hostnames, param, task, env):
def start_jupyter_server(hostname, hostnames, param, task, env): def start_jupyter_server(hostname, hostnames, param, task, env):
if not param.get('jupyterlab', True):
print('no jupyterlab to monitor - going to sleep')
while True:
sleep(10.)
return
# execute jupyter notebook # execute jupyter notebook
fd, local_filename = mkstemp() fd, local_filename = mkstemp()
cwd = ( cwd = (
@ -324,6 +344,10 @@ def start_jupyter_server(hostname, hostnames, param, task, env):
# find a free tcp port # find a free tcp port
port = get_free_port(8888, 9000) port = get_free_port(8888, 9000)
# if we are not running as root, make sure the sys executable is in the PATH
env = dict(**env)
env['PATH'] = '{}:{}'.format(Path(sys.executable).parent.as_posix(), env.get('PATH', ''))
# make sure we have the needed cwd # make sure we have the needed cwd
# noinspection PyBroadException # noinspection PyBroadException
try: try:
@ -342,7 +366,7 @@ def start_jupyter_server(hostname, hostnames, param, task, env):
"--no-browser", "--no-browser",
"--allow-root", "--allow-root",
"--ip", "--ip",
"0.0.0.0", "127.0.0.1",
"--port", "--port",
str(port), str(port),
], ],
@ -365,6 +389,8 @@ def setup_ssh_server(hostname, hostnames, param, task):
port = get_free_port(10022, 15000) port = get_free_port(10022, 15000)
proxy_port = get_free_port(10022, 15000) proxy_port = get_free_port(10022, 15000)
# if we are root, install open-ssh
if os.geteuid() == 0:
# noinspection SpellCheckingInspection # noinspection SpellCheckingInspection
os.system( os.system(
"export PYTHONPATH=\"\" && " "export PYTHONPATH=\"\" && "
@ -388,11 +414,44 @@ def setup_ssh_server(hostname, hostnames, param, task):
trains_config_file=os.environ.get("CLEARML_CONFIG_FILE") or os.environ.get("TRAINS_CONFIG_FILE"), trains_config_file=os.environ.get("CLEARML_CONFIG_FILE") or os.environ.get("TRAINS_CONFIG_FILE"),
) )
) )
sshd_path = '/usr/sbin/sshd'
ssh_config_path = '/etc/ssh/'
custom_ssh_conf = None
else:
# check if sshd exists
# noinspection PyBroadException
try:
sshd_path = subprocess.check_output('which sshd', shell=True).decode().strip()
ssh_config_path = os.path.join(os.getcwd(), '.clearml_session_sshd')
Path(ssh_config_path).mkdir(parents=True, exist_ok=True)
custom_ssh_conf = os.path.join(ssh_config_path, 'sshd_config')
with open(custom_ssh_conf, 'wt') as f:
conf = \
"PermitRootLogin yes" + "\n"\
"ClientAliveInterval 10" + "\n"\
"ClientAliveCountMax 20" + "\n"\
"AllowTcpForwarding yes" + "\n"\
"UsePAM yes" + "\n"\
"AuthorizedKeysFile {}".format(os.path.join(ssh_config_path, 'authorized_keys')) + "\n"\
"PidFile {}".format(os.path.join(ssh_config_path, 'sshd.pid')) + "\n"\
"AcceptEnv TRAINS_API_ACCESS_KEY TRAINS_API_SECRET_KEY "\
"CLEARML_API_ACCESS_KEY CLEARML_API_SECRET_KEY"+"\n"
for k in default_ssh_fingerprint:
filename = os.path.join(ssh_config_path, '{}'.format(k.replace('__pub', '.pub')))
conf += "HostKey {}\n".format(filename)
f.write(conf)
except Exception:
print('Error: Cannot install sshd (not root) and could not find sshd executable, leaving!')
return
# clear the ssh password, we cannot change it
ssh_password = None
task.set_parameter('{}/ssh_password'.format(config_section_name), '')
# create fingerprint files # create fingerprint files
Path('/etc/ssh/').mkdir(parents=True, exist_ok=True) Path(ssh_config_path).mkdir(parents=True, exist_ok=True)
for k, v in default_ssh_fingerprint.items(): for k, v in default_ssh_fingerprint.items():
filename = '/etc/ssh/{}'.format(k.replace('__pub', '.pub')) filename = os.path.join(ssh_config_path, '{}'.format(k.replace('__pub', '.pub')))
try: try:
os.unlink(filename) os.unlink(filename)
except Exception: # noqa except Exception: # noqa
@ -400,12 +459,20 @@ def setup_ssh_server(hostname, hostnames, param, task):
if v: if v:
with open(filename, 'wt') as f: with open(filename, 'wt') as f:
f.write(v + (' root@{}'.format(hostname) if filename.endswith('.pub') else '')) f.write(v + (' root@{}'.format(hostname) if filename.endswith('.pub') else ''))
os.chmod(filename, 0o644 if filename.endswith('.pub') else 0o600) os.chmod(filename, 0o600 if filename.endswith('.pub') else 0o600)
# run server # run server in foreground so it gets killed with us
result = os.system("/usr/sbin/sshd -p {port}".format(port=port)) proc_args = [sshd_path, "-D", "-p", str(port)] + (["-f", custom_ssh_conf] if custom_ssh_conf else [])
proc = subprocess.Popen(args=proc_args)
# noinspection PyBroadException
try:
result = proc.wait(timeout=1)
except Exception:
result = 0
if result != 0:
raise ValueError("Failed launching sshd: ", proc_args)
if result == 0:
# noinspection PyBroadException # noinspection PyBroadException
try: try:
TcpProxy(listen_port=proxy_port, target_port=port, proxy_state={}, verbose=False, # noqa TcpProxy(listen_port=proxy_port, target_port=port, proxy_state={}, verbose=False, # noqa
@ -424,10 +491,9 @@ def setup_ssh_server(hostname, hostnames, param, task):
hostname, hostnames, port, ssh_password hostname, hostnames, port, ssh_password
) )
) )
else:
raise ValueError()
except Exception as ex: except Exception as ex:
print("{}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex)) print("Error: {}\n\n#\n# Error: SSH server could not be launched\n#\n".format(ex))
def setup_user_env(param, task): def setup_user_env(param, task):
@ -593,6 +659,7 @@ def main():
"user_key": None, "user_key": None,
"user_secret": None, "user_secret": None,
"vscode_server": True, "vscode_server": True,
"jupyterlab": True,
"public_ip": False, "public_ip": False,
} }
task = init_task(param, default_ssh_fingerprint) task = init_task(param, default_ssh_fingerprint)