mirror of
https://github.com/clearml/clearml-agent
synced 2025-01-31 09:06:52 +00:00
Send task.ping
for executing tasks every 120 seconds (set using the agent.task_ping_interval_sec
configuration option)
This commit is contained in:
parent
396abf13b6
commit
57cde21c48
@ -14,6 +14,14 @@ except ImportError:
|
|||||||
ConverterType = TypeVar("ConverterType", bound=Callable[[Any], Any])
|
ConverterType = TypeVar("ConverterType", bound=Callable[[Any], Any])
|
||||||
|
|
||||||
|
|
||||||
|
def text_to_int(value, default=0):
|
||||||
|
# type: (Any, int) -> int
|
||||||
|
try:
|
||||||
|
return int(value)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
def base64_to_text(value):
|
def base64_to_text(value):
|
||||||
# type: (Any) -> Text
|
# type: (Any) -> Text
|
||||||
return base64.b64decode(value).decode("utf-8")
|
return base64.b64decode(value).decode("utf-8")
|
||||||
|
@ -41,6 +41,7 @@ from clearml_agent.backend_api.session.defs import (
|
|||||||
ENV_VENV_CONFIGURED, ENV_PROPAGATE_EXITCODE, )
|
ENV_VENV_CONFIGURED, ENV_PROPAGATE_EXITCODE, )
|
||||||
from clearml_agent.backend_config.defs import UptimeConf
|
from clearml_agent.backend_config.defs import UptimeConf
|
||||||
from clearml_agent.backend_config.utils import apply_environment, apply_files
|
from clearml_agent.backend_config.utils import apply_environment, apply_files
|
||||||
|
from clearml_agent.backend_config.converters import text_to_int
|
||||||
from clearml_agent.commands.base import resolve_names, ServiceCommandSection
|
from clearml_agent.commands.base import resolve_names, ServiceCommandSection
|
||||||
from clearml_agent.commands.resolver import resolve_default_container
|
from clearml_agent.commands.resolver import resolve_default_container
|
||||||
from clearml_agent.definitions import (
|
from clearml_agent.definitions import (
|
||||||
@ -686,6 +687,10 @@ class Worker(ServiceCommandSection):
|
|||||||
else:
|
else:
|
||||||
self._docker_args_filters = []
|
self._docker_args_filters = []
|
||||||
|
|
||||||
|
self._task_ping_interval_sec = max(
|
||||||
|
0, text_to_int(self._session.config.get("agent.task_ping_interval_sec", 120.0))
|
||||||
|
)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _verify_command_states(cls, kwargs):
|
def _verify_command_states(cls, kwargs):
|
||||||
"""
|
"""
|
||||||
@ -1737,6 +1742,7 @@ class Worker(ServiceCommandSection):
|
|||||||
stopping = False
|
stopping = False
|
||||||
status = None
|
status = None
|
||||||
process = None
|
process = None
|
||||||
|
last_task_ping = 0
|
||||||
try:
|
try:
|
||||||
_last_machine_update_ts = time()
|
_last_machine_update_ts = time()
|
||||||
stop_reason = None
|
stop_reason = None
|
||||||
@ -1772,6 +1778,17 @@ class Worker(ServiceCommandSection):
|
|||||||
if stderr:
|
if stderr:
|
||||||
stderr.flush()
|
stderr.flush()
|
||||||
|
|
||||||
|
if self._task_ping_interval_sec and time() - last_task_ping > self._task_ping_interval_sec:
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
res = (session or self._session).send(tasks_api.PingRequest(task=task_id))
|
||||||
|
if not res:
|
||||||
|
self.log.error("Failed sending ping for task %s: %s", task_id, res.response)
|
||||||
|
except Exception as ex:
|
||||||
|
self.log.error("Failed sending ping: %s", str(ex))
|
||||||
|
finally:
|
||||||
|
self._task_ping_interval_sec = time()
|
||||||
|
|
||||||
# get diff from previous poll
|
# get diff from previous poll
|
||||||
printed_lines, stdout_pos_count = _print_file(stdout_path, stdout_pos_count)
|
printed_lines, stdout_pos_count = _print_file(stdout_path, stdout_pos_count)
|
||||||
if self._services_mode and not stopping and status is None:
|
if self._services_mode and not stopping and status is None:
|
||||||
|
Loading…
Reference in New Issue
Block a user