mirror of
https://github.com/clearml/clearml
synced 2025-04-06 21:54:36 +00:00
Fix spawn logger/reporting
This commit is contained in:
parent
737ca91d2a
commit
d37aa23fbf
@ -119,7 +119,10 @@ class BackgroundReportService(BackgroundMonitor, AsyncManagerMixin):
|
||||
return True
|
||||
if not self.is_alive():
|
||||
return False
|
||||
return not self._res_waiting.get_value()
|
||||
try:
|
||||
return not self._res_waiting.get_value()
|
||||
except NotImplementedError:
|
||||
return self.get_num_results() > 0
|
||||
|
||||
|
||||
class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncManagerMixin):
|
||||
@ -207,6 +210,9 @@ class Reporter(InterfaceBase, AbstractContextManager, SetupUploadMixin, AsyncMan
|
||||
else:
|
||||
report_service.send_all_events()
|
||||
|
||||
def is_alive(self):
|
||||
return self._report_service and self._report_service.is_alive()
|
||||
|
||||
def get_num_results(self):
|
||||
return self._report_service.get_num_results()
|
||||
|
||||
|
@ -92,8 +92,7 @@ class BackgroundLogService(BackgroundMonitor):
|
||||
while self._queue and not self._queue.empty():
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
# request = self._queue.get(block=False)
|
||||
request = self._queue.get()
|
||||
request = self._queue.get(block=False)
|
||||
if request:
|
||||
buffer.append(request)
|
||||
except Exception:
|
||||
|
@ -70,7 +70,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
||||
|
||||
_store_diff = config.get('development.store_uncommitted_code_diff', False)
|
||||
_store_remote_diff = config.get('development.store_code_diff_from_remote', False)
|
||||
_report_subprocess_enabled = config.get('development.report_use_subprocess', True)
|
||||
_report_subprocess_enabled = config.get('development.report_use_subprocess', sys.platform == 'linux')
|
||||
_offline_filename = 'task.json'
|
||||
|
||||
class TaskTypes(Enum):
|
||||
@ -1607,7 +1607,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
||||
|
||||
@classmethod
|
||||
def add_requirements(cls, package_name, package_version=None):
|
||||
# type: (str, Optional[str]) -> ()
|
||||
# type: (str, Optional[str]) -> None
|
||||
"""
|
||||
Force the adding of a package to the requirements list. If ``package_version`` is None, use the
|
||||
installed package version, if found.
|
||||
@ -2091,7 +2091,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
||||
|
||||
@classmethod
|
||||
def __update_master_pid_task(cls, pid=None, task=None):
|
||||
# type: (Optional[int], Union[str, Task]) -> ()
|
||||
# type: (Optional[int], Union[str, Task]) -> None
|
||||
pid = pid or os.getpid()
|
||||
if not task:
|
||||
PROC_MASTER_ID_ENV_VAR.set(str(pid) + ':')
|
||||
@ -2122,7 +2122,7 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
||||
|
||||
@classmethod
|
||||
def set_offline(cls, offline_mode=False):
|
||||
# type: (bool) -> ()
|
||||
# type: (bool) -> None
|
||||
"""
|
||||
Set offline mode, where all data and logs are stored into local folder, for later transmission
|
||||
|
||||
|
@ -3,6 +3,7 @@ import os
|
||||
import six
|
||||
|
||||
from ..config import TASK_LOG_ENVIRONMENT, running_remotely, config
|
||||
from ..utilities.process.mp import BackgroundMonitor
|
||||
|
||||
|
||||
class EnvironmentBind(object):
|
||||
@ -83,10 +84,8 @@ class PatchOsFork(object):
|
||||
task = Task.init(project_name=None, task_name=None, task_type=None)
|
||||
task.get_logger().flush()
|
||||
|
||||
# Hack: now make sure we setup the reporter thread
|
||||
|
||||
# noinspection PyProtectedMember
|
||||
task._setup_reporter()
|
||||
# Hack: now make sure we setup the reporter threads (Log+Reporter)
|
||||
BackgroundMonitor.start_all(task=task)
|
||||
|
||||
# TODO: Check if the signal handler method is enough, for the time being, we have both
|
||||
# # if we got here patch the os._exit of our instance to call us
|
||||
|
@ -45,7 +45,7 @@ from .binding.frameworks.xgboost_bind import PatchXGBoostModelIO
|
||||
from .binding.joblib_bind import PatchedJoblib
|
||||
from .binding.matplotlib_bind import PatchedMatplotlib
|
||||
from .binding.hydra_bind import PatchHydra
|
||||
from .config import config, DEV_TASK_NO_REUSE, get_is_master_node, DEBUG_SIMULATE_REMOTE_TASK
|
||||
from .config import config, DEV_TASK_NO_REUSE, get_is_master_node, DEBUG_SIMULATE_REMOTE_TASK, PROC_MASTER_ID_ENV_VAR
|
||||
from .config import running_remotely, get_remote_task_id
|
||||
from .config.cache import SessionCache
|
||||
from .debugging.log import LoggerRoot
|
||||
@ -179,6 +179,11 @@ class Task(_Task):
|
||||
|
||||
:return: The current running Task (experiment).
|
||||
"""
|
||||
# check if we have no main Task, but the main process created one.
|
||||
if not cls.__main_task and PROC_MASTER_ID_ENV_VAR.get():
|
||||
# initialize the Task, connect to stdout
|
||||
Task.init()
|
||||
# return main Task
|
||||
return cls.__main_task
|
||||
|
||||
@classmethod
|
||||
@ -433,7 +438,7 @@ class Task(_Task):
|
||||
|
||||
is_sub_process_task_id = None
|
||||
# check that we are not a child process, in that case do nothing.
|
||||
# we should not get here unless this is Windows platform, all others support fork
|
||||
# we should not get here unless this is Windows/macOS platform, linux support fork
|
||||
if cls.__is_subprocess():
|
||||
class _TaskStub(object):
|
||||
def __call__(self, *args, **kwargs):
|
||||
@ -588,6 +593,10 @@ class Task(_Task):
|
||||
# something to the log.
|
||||
task._dev_mode_task_start()
|
||||
|
||||
if (not task._reporter or not task._reporter.is_alive()) and \
|
||||
is_sub_process_task_id and not cls._report_subprocess_enabled:
|
||||
task._setup_reporter()
|
||||
|
||||
# start monitoring in background process or background threads
|
||||
# monitoring are: Resource monitoring and Dev Worker monitoring classes
|
||||
BackgroundMonitor.start_all(task=task)
|
||||
@ -2159,7 +2168,7 @@ class Task(_Task):
|
||||
secret=None,
|
||||
store_conf_file=False
|
||||
):
|
||||
# type: (Optional[str], Optional[str], Optional[str], Optional[str], Optional[str], bool) -> ()
|
||||
# type: (Optional[str], Optional[str], Optional[str], Optional[str], Optional[str], bool) -> None
|
||||
"""
|
||||
Set new default **ClearML Server** (backend) host and credentials.
|
||||
|
||||
@ -2606,7 +2615,7 @@ class Task(_Task):
|
||||
try:
|
||||
if 'IPython' in sys.modules:
|
||||
# noinspection PyPackageRequirements
|
||||
from IPython import get_ipython
|
||||
from IPython import get_ipython # noqa
|
||||
ip = get_ipython()
|
||||
if ip is not None and 'IPKernelApp' in ip.config:
|
||||
return parser
|
||||
@ -2868,7 +2877,7 @@ class Task(_Task):
|
||||
|
||||
is_sub_process = self.__is_subprocess()
|
||||
|
||||
if not is_sub_process:
|
||||
if True:##not is_sub_process:
|
||||
# noinspection PyBroadException
|
||||
try:
|
||||
wait_for_uploads = True
|
||||
@ -2918,7 +2927,8 @@ class Task(_Task):
|
||||
self._wait_for_repo_detection(timeout=10.)
|
||||
|
||||
# kill the repo thread (negative timeout, do not wait), if it hasn't finished yet.
|
||||
self._wait_for_repo_detection(timeout=-1)
|
||||
if not is_sub_process:
|
||||
self._wait_for_repo_detection(timeout=-1)
|
||||
|
||||
# wait for uploads
|
||||
print_done_waiting = False
|
||||
|
@ -228,7 +228,7 @@ class BackgroundMonitor(object):
|
||||
self._get_instances().append(self)
|
||||
|
||||
def wait(self, timeout=None):
|
||||
if not self._thread:
|
||||
if not self._done_ev:
|
||||
return
|
||||
self._done_ev.wait(timeout=timeout)
|
||||
|
||||
@ -264,6 +264,7 @@ class BackgroundMonitor(object):
|
||||
self._start_ev.set()
|
||||
self.daemon()
|
||||
self.post_execution()
|
||||
self._thread = None
|
||||
|
||||
def post_execution(self):
|
||||
self._done_ev.set()
|
||||
|
Loading…
Reference in New Issue
Block a user