mirror of
https://github.com/clearml/clearml
synced 2025-04-22 07:15:57 +00:00
Fix spawn logger/reporting
This commit is contained in:
parent
737ca91d2a
commit
d37aa23fbf
@ -119,7 +119,10 @@ class BackgroundReportService(BackgroundMonitor, AsyncManagerMixin):
|
|||||||
return True
|
return True
|
||||||
if not self.is_alive():
|
if not self.is_alive():
|
||||||
return False
|
return False
|
||||||
|
try:
|
||||||
return not self._res_waiting.get_value()
|
return not self._res_waiting.get_value()
|
||||||
|
except NotImplementedError:
|
||||||
|
return self.get_num_results() > 0
|
||||||
|
|
||||||
|
|
||||||
class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncManagerMixin):
|
class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncManagerMixin):
|
||||||
@ -207,6 +210,9 @@ class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncMan
|
|||||||
else:
|
else:
|
||||||
report_service.send_all_events()
|
report_service.send_all_events()
|
||||||
|
|
||||||
|
def is_alive(self):
|
||||||
|
return self._report_service and self._report_service.is_alive()
|
||||||
|
|
||||||
def get_num_results(self):
|
def get_num_results(self):
|
||||||
return self._report_service.get_num_results()
|
return self._report_service.get_num_results()
|
||||||
|
|
||||||
|
@ -92,8 +92,7 @@ class BackgroundLogService(BackgroundMonitor):
|
|||||||
while self._queue and not self._queue.empty():
|
while self._queue and not self._queue.empty():
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
# request = self._queue.get(block=False)
|
request = self._queue.get(block=False)
|
||||||
request = self._queue.get()
|
|
||||||
if request:
|
if request:
|
||||||
buffer.append(request)
|
buffer.append(request)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
@ -70,7 +70,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
|||||||
|
|
||||||
_store_diff = config.get('development.store_uncommitted_code_diff', False)
|
_store_diff = config.get('development.store_uncommitted_code_diff', False)
|
||||||
_store_remote_diff = config.get('development.store_code_diff_from_remote', False)
|
_store_remote_diff = config.get('development.store_code_diff_from_remote', False)
|
||||||
_report_subprocess_enabled = config.get('development.report_use_subprocess', True)
|
_report_subprocess_enabled = config.get('development.report_use_subprocess', sys.platform == 'linux')
|
||||||
_offline_filename = 'task.json'
|
_offline_filename = 'task.json'
|
||||||
|
|
||||||
class TaskTypes(Enum):
|
class TaskTypes(Enum):
|
||||||
@ -1607,7 +1607,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def add_requirements(cls, package_name, package_version=None):
|
def add_requirements(cls, package_name, package_version=None):
|
||||||
# type: (str, Optional[str]) -> ()
|
# type: (str, Optional[str]) -> None
|
||||||
"""
|
"""
|
||||||
Force the adding of a package to the requirements list. If ``package_version`` is None, use the
|
Force the adding of a package to the requirements list. If ``package_version`` is None, use the
|
||||||
installed package version, if found.
|
installed package version, if found.
|
||||||
@ -2091,7 +2091,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __update_master_pid_task(cls, pid=None, task=None):
|
def __update_master_pid_task(cls, pid=None, task=None):
|
||||||
# type: (Optional[int], Union[str, Task]) -> ()
|
# type: (Optional[int], Union[str, Task]) -> None
|
||||||
pid = pid or os.getpid()
|
pid = pid or os.getpid()
|
||||||
if not task:
|
if not task:
|
||||||
PROC_MASTER_ID_ENV_VAR.set(str(pid) + ':')
|
PROC_MASTER_ID_ENV_VAR.set(str(pid) + ':')
|
||||||
@ -2122,7 +2122,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def set_offline(cls, offline_mode=False):
|
def set_offline(cls, offline_mode=False):
|
||||||
# type: (bool) -> ()
|
# type: (bool) -> None
|
||||||
"""
|
"""
|
||||||
Set offline mode, where all data and logs are stored into local folder, for later transmission
|
Set offline mode, where all data and logs are stored into local folder, for later transmission
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ import os
|
|||||||
import six
|
import six
|
||||||
|
|
||||||
from ..config import TASK_LOG_ENVIRONMENT, running_remotely, config
|
from ..config import TASK_LOG_ENVIRONMENT, running_remotely, config
|
||||||
|
from ..utilities.process.mp import BackgroundMonitor
|
||||||
|
|
||||||
|
|
||||||
class EnvironmentBind(object):
|
class EnvironmentBind(object):
|
||||||
@ -83,10 +84,8 @@ class PatchOsFork(object):
|
|||||||
task = Task.init(project_name=None, task_name=None, task_type=None)
|
task = Task.init(project_name=None, task_name=None, task_type=None)
|
||||||
task.get_logger().flush()
|
task.get_logger().flush()
|
||||||
|
|
||||||
# Hack: now make sure we setup the reporter thread
|
# Hack: now make sure we setup the reporter threads (Log+Reporter)
|
||||||
|
BackgroundMonitor.start_all(task=task)
|
||||||
# noinspection PyProtectedMember
|
|
||||||
task._setup_reporter()
|
|
||||||
|
|
||||||
# TODO: Check if the signal handler method is enough, for the time being, we have both
|
# TODO: Check if the signal handler method is enough, for the time being, we have both
|
||||||
# # if we got here patch the os._exit of our instance to call us
|
# # if we got here patch the os._exit of our instance to call us
|
||||||
|
@ -45,7 +45,7 @@ from .binding.frameworks.xgboost_bind import PatchXGBoostModelIO
|
|||||||
from .binding.joblib_bind import PatchedJoblib
|
from .binding.joblib_bind import PatchedJoblib
|
||||||
from .binding.matplotlib_bind import PatchedMatplotlib
|
from .binding.matplotlib_bind import PatchedMatplotlib
|
||||||
from .binding.hydra_bind import PatchHydra
|
from .binding.hydra_bind import PatchHydra
|
||||||
from .config import config, DEV_TASK_NO_REUSE, get_is_master_node, DEBUG_SIMULATE_REMOTE_TASK
|
from .config import config, DEV_TASK_NO_REUSE, get_is_master_node, DEBUG_SIMULATE_REMOTE_TASK, PROC_MASTER_ID_ENV_VAR
|
||||||
from .config import running_remotely, get_remote_task_id
|
from .config import running_remotely, get_remote_task_id
|
||||||
from .config.cache import SessionCache
|
from .config.cache import SessionCache
|
||||||
from .debugging.log import LoggerRoot
|
from .debugging.log import LoggerRoot
|
||||||
@ -179,6 +179,11 @@ class Task(_Task):
|
|||||||
|
|
||||||
:return: The current running Task (experiment).
|
:return: The current running Task (experiment).
|
||||||
"""
|
"""
|
||||||
|
# check if we have no main Task, but the main process created one.
|
||||||
|
if not cls.__main_task and PROC_MASTER_ID_ENV_VAR.get():
|
||||||
|
# initialize the Task, connect to stdout
|
||||||
|
Task.init()
|
||||||
|
# return main Task
|
||||||
return cls.__main_task
|
return cls.__main_task
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
@ -433,7 +438,7 @@ class Task(_Task):
|
|||||||
|
|
||||||
is_sub_process_task_id = None
|
is_sub_process_task_id = None
|
||||||
# check that we are not a child process, in that case do nothing.
|
# check that we are not a child process, in that case do nothing.
|
||||||
# we should not get here unless this is Windows platform, all others support fork
|
# we should not get here unless this is Windows/macOS platform, linux support fork
|
||||||
if cls.__is_subprocess():
|
if cls.__is_subprocess():
|
||||||
class _TaskStub(object):
|
class _TaskStub(object):
|
||||||
def __call__(self, *args, **kwargs):
|
def __call__(self, *args, **kwargs):
|
||||||
@ -588,6 +593,10 @@ class Task(_Task):
|
|||||||
# something to the log.
|
# something to the log.
|
||||||
task._dev_mode_task_start()
|
task._dev_mode_task_start()
|
||||||
|
|
||||||
|
if (not task._reporter or not task._reporter.is_alive()) and \
|
||||||
|
is_sub_process_task_id and not cls._report_subprocess_enabled:
|
||||||
|
task._setup_reporter()
|
||||||
|
|
||||||
# start monitoring in background process or background threads
|
# start monitoring in background process or background threads
|
||||||
# monitoring are: Resource monitoring and Dev Worker monitoring classes
|
# monitoring are: Resource monitoring and Dev Worker monitoring classes
|
||||||
BackgroundMonitor.start_all(task=task)
|
BackgroundMonitor.start_all(task=task)
|
||||||
@ -2159,7 +2168,7 @@ class Task(_Task):
|
|||||||
secret=None,
|
secret=None,
|
||||||
store_conf_file=False
|
store_conf_file=False
|
||||||
):
|
):
|
||||||
# type: (Optional[str], Optional[str], Optional[str], Optional[str], Optional[str], bool) -> ()
|
# type: (Optional[str], Optional[str], Optional[str], Optional[str], Optional[str], bool) -> None
|
||||||
"""
|
"""
|
||||||
Set new default **ClearML Server** (backend) host and credentials.
|
Set new default **ClearML Server** (backend) host and credentials.
|
||||||
|
|
||||||
@ -2606,7 +2615,7 @@ class Task(_Task):
|
|||||||
try:
|
try:
|
||||||
if 'IPython' in sys.modules:
|
if 'IPython' in sys.modules:
|
||||||
# noinspection PyPackageRequirements
|
# noinspection PyPackageRequirements
|
||||||
from IPython import get_ipython
|
from IPython import get_ipython # noqa
|
||||||
ip = get_ipython()
|
ip = get_ipython()
|
||||||
if ip is not None and 'IPKernelApp' in ip.config:
|
if ip is not None and 'IPKernelApp' in ip.config:
|
||||||
return parser
|
return parser
|
||||||
@ -2868,7 +2877,7 @@ class Task(_Task):
|
|||||||
|
|
||||||
is_sub_process = self.__is_subprocess()
|
is_sub_process = self.__is_subprocess()
|
||||||
|
|
||||||
if not is_sub_process:
|
if True:##not is_sub_process:
|
||||||
# noinspection PyBroadException
|
# noinspection PyBroadException
|
||||||
try:
|
try:
|
||||||
wait_for_uploads = True
|
wait_for_uploads = True
|
||||||
@ -2918,6 +2927,7 @@ class Task(_Task):
|
|||||||
self._wait_for_repo_detection(timeout=10.)
|
self._wait_for_repo_detection(timeout=10.)
|
||||||
|
|
||||||
# kill the repo thread (negative timeout, do not wait), if it hasn't finished yet.
|
# kill the repo thread (negative timeout, do not wait), if it hasn't finished yet.
|
||||||
|
if not is_sub_process:
|
||||||
self._wait_for_repo_detection(timeout=-1)
|
self._wait_for_repo_detection(timeout=-1)
|
||||||
|
|
||||||
# wait for uploads
|
# wait for uploads
|
||||||
|
@ -228,7 +228,7 @@ class BackgroundMonitor(object):
|
|||||||
self._get_instances().append(self)
|
self._get_instances().append(self)
|
||||||
|
|
||||||
def wait(self, timeout=None):
|
def wait(self, timeout=None):
|
||||||
if not self._thread:
|
if not self._done_ev:
|
||||||
return
|
return
|
||||||
self._done_ev.wait(timeout=timeout)
|
self._done_ev.wait(timeout=timeout)
|
||||||
|
|
||||||
@ -264,6 +264,7 @@ class BackgroundMonitor(object):
|
|||||||
self._start_ev.set()
|
self._start_ev.set()
|
||||||
self.daemon()
|
self.daemon()
|
||||||
self.post_execution()
|
self.post_execution()
|
||||||
|
self._thread = None
|
||||||
|
|
||||||
def post_execution(self):
|
def post_execution(self):
|
||||||
self._done_ev.set()
|
self._done_ev.set()
|
||||||
|
Loading…
Reference in New Issue
Block a user