mirror of
https://github.com/clearml/clearml
synced 2025-06-26 18:16:07 +00:00
Add hyperparameter sections support to pipeline decorator (#629)
Fix pipeline add_function_step breaks in remote execution
This commit is contained in:
parent
cb36da3ded
commit
2f42fc4830
@ -53,6 +53,7 @@ class PipelineController(object):
|
|||||||
_task_project_lookup = {}
|
_task_project_lookup = {}
|
||||||
_clearml_job_class = ClearmlJob
|
_clearml_job_class = ClearmlJob
|
||||||
_update_execution_plot_interval = 5.*60
|
_update_execution_plot_interval = 5.*60
|
||||||
|
_update_progress_interval = 10.
|
||||||
_monitor_node_interval = 5.*60
|
_monitor_node_interval = 5.*60
|
||||||
_report_plot_execution_flow = dict(title='Pipeline', series='Execution Flow')
|
_report_plot_execution_flow = dict(title='Pipeline', series='Execution Flow')
|
||||||
_report_plot_execution_details = dict(title='Pipeline Details', series='Execution Details')
|
_report_plot_execution_details = dict(title='Pipeline Details', series='Execution Details')
|
||||||
@ -152,6 +153,7 @@ class PipelineController(object):
|
|||||||
self._pipeline_args = dict()
|
self._pipeline_args = dict()
|
||||||
self._pipeline_args_desc = dict()
|
self._pipeline_args_desc = dict()
|
||||||
self._pipeline_args_type = dict()
|
self._pipeline_args_type = dict()
|
||||||
|
self._args_map = dict()
|
||||||
self._stop_event = None
|
self._stop_event = None
|
||||||
self._experiment_created_cb = None
|
self._experiment_created_cb = None
|
||||||
self._experiment_completed_cb = None
|
self._experiment_completed_cb = None
|
||||||
@ -166,6 +168,7 @@ class PipelineController(object):
|
|||||||
self._auto_version_bump = bool(auto_version_bump)
|
self._auto_version_bump = bool(auto_version_bump)
|
||||||
self._mock_execution = False # used for nested pipelines (eager execution)
|
self._mock_execution = False # used for nested pipelines (eager execution)
|
||||||
self._pipeline_as_sub_project = bool(Session.check_min_api_server_version("2.17"))
|
self._pipeline_as_sub_project = bool(Session.check_min_api_server_version("2.17"))
|
||||||
|
self._last_progress_update_time = 0
|
||||||
if not self._task:
|
if not self._task:
|
||||||
task_name = name or project or '{}'.format(datetime.now())
|
task_name = name or project or '{}'.format(datetime.now())
|
||||||
if self._pipeline_as_sub_project:
|
if self._pipeline_as_sub_project:
|
||||||
@ -492,7 +495,7 @@ class PipelineController(object):
|
|||||||
|
|
||||||
create_task_from_function(
|
create_task_from_function(
|
||||||
mock_func,
|
mock_func,
|
||||||
function_input_artifacts={'matrix_np': 'aabb1122.previous_matrix'},
|
function_kwargs={'matrix_np': 'aabb1122.previous_matrix'},
|
||||||
function_return=['square_matrix']
|
function_return=['square_matrix']
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -504,7 +507,7 @@ class PipelineController(object):
|
|||||||
Example argument named `numpy_matrix` from Task ID `aabbcc` artifact name `answer`:
|
Example argument named `numpy_matrix` from Task ID `aabbcc` artifact name `answer`:
|
||||||
{'numpy_matrix': 'aabbcc.answer'}
|
{'numpy_matrix': 'aabbcc.answer'}
|
||||||
:param function_return: Provide a list of names for all the results.
|
:param function_return: Provide a list of names for all the results.
|
||||||
If not provided no results will be stored as artifacts.
|
If not provided, no results will be stored as artifacts.
|
||||||
:param project_name: Set the project name for the task. Required if base_task_id is None.
|
:param project_name: Set the project name for the task. Required if base_task_id is None.
|
||||||
:param task_name: Set the name of the remote task, if not provided use `name` argument.
|
:param task_name: Set the name of the remote task, if not provided use `name` argument.
|
||||||
:param task_type: Optional, The task type to be created. Supported values: 'training', 'testing', 'inference',
|
:param task_type: Optional, The task type to be created. Supported values: 'training', 'testing', 'inference',
|
||||||
@ -617,8 +620,10 @@ class PipelineController(object):
|
|||||||
if step in self._nodes and artifact in self._nodes[step].return_artifacts:
|
if step in self._nodes and artifact in self._nodes[step].return_artifacts:
|
||||||
function_input_artifacts[k] = "${{{}.id}}.{}".format(step, artifact)
|
function_input_artifacts[k] = "${{{}.id}}.{}".format(step, artifact)
|
||||||
continue
|
continue
|
||||||
# verify the reference
|
# verify the reference only if we are running locally (on remote when we have multiple
|
||||||
self.__verify_step_reference(node=self.Node(name=name), step_ref_string=v)
|
# steps from tasks the _nodes is till empty, only after deserializing we will have the full DAG)
|
||||||
|
if self._task.running_locally():
|
||||||
|
self.__verify_step_reference(node=self.Node(name=name), step_ref_string=v)
|
||||||
|
|
||||||
function_kwargs = {k: v for k, v in function_kwargs.items() if k not in function_input_artifacts}
|
function_kwargs = {k: v for k, v in function_kwargs.items() if k not in function_input_artifacts}
|
||||||
parameters = {"{}/{}".format(CreateFromFunction.kwargs_section, k): v for k, v in function_kwargs.items()}
|
parameters = {"{}/{}".format(CreateFromFunction.kwargs_section, k): v for k, v in function_kwargs.items()}
|
||||||
@ -688,9 +693,6 @@ class PipelineController(object):
|
|||||||
job_code_section=job_code_section,
|
job_code_section=job_code_section,
|
||||||
)
|
)
|
||||||
|
|
||||||
if self._task and not self._task.running_locally() and not self._mock_execution:
|
|
||||||
self.update_execution_plot()
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def start(
|
def start(
|
||||||
@ -899,7 +901,7 @@ class PipelineController(object):
|
|||||||
|
|
||||||
:param Any preview: The artifact preview
|
:param Any preview: The artifact preview
|
||||||
|
|
||||||
:param bool wait_on_upload: Whether or not the upload should be synchronous, forcing the upload to complete
|
:param bool wait_on_upload: Whether the upload should be synchronous, forcing the upload to complete
|
||||||
before continuing.
|
before continuing.
|
||||||
|
|
||||||
:return: The status of the upload.
|
:return: The status of the upload.
|
||||||
@ -1225,10 +1227,17 @@ class PipelineController(object):
|
|||||||
self._task._set_configuration(
|
self._task._set_configuration(
|
||||||
name=self._config_section, config_type='dictionary',
|
name=self._config_section, config_type='dictionary',
|
||||||
config_text=json.dumps(pipeline_dag, indent=2))
|
config_text=json.dumps(pipeline_dag, indent=2))
|
||||||
|
args_map_inversed = {}
|
||||||
|
for section, arg_list in self._args_map.items():
|
||||||
|
for arg in arg_list:
|
||||||
|
args_map_inversed[arg] = section
|
||||||
pipeline_args = flatten_dictionary(self._pipeline_args)
|
pipeline_args = flatten_dictionary(self._pipeline_args)
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
self._task._set_parameters(
|
self._task._set_parameters(
|
||||||
{'{}/{}'.format(self._args_section, k): v for k, v in pipeline_args.items()},
|
{
|
||||||
|
"{}/{}".format(args_map_inversed.get(k, self._args_section), k): v
|
||||||
|
for k, v in pipeline_args.items()
|
||||||
|
},
|
||||||
__parameters_descriptions=self._pipeline_args_desc,
|
__parameters_descriptions=self._pipeline_args_desc,
|
||||||
__parameters_types=self._pipeline_args_type,
|
__parameters_types=self._pipeline_args_type,
|
||||||
__update=True,
|
__update=True,
|
||||||
@ -1249,7 +1258,19 @@ class PipelineController(object):
|
|||||||
})
|
})
|
||||||
else:
|
else:
|
||||||
self._task.connect_configuration(pipeline_dag, name=self._config_section)
|
self._task.connect_configuration(pipeline_dag, name=self._config_section)
|
||||||
self._task.connect(self._pipeline_args, name=self._args_section)
|
connected_args = set()
|
||||||
|
new_pipeline_args = {}
|
||||||
|
for section, arg_list in self._args_map.items():
|
||||||
|
mutable_dict = {arg: self._pipeline_args.get(arg) for arg in arg_list}
|
||||||
|
self._task.connect(mutable_dict, name=section)
|
||||||
|
new_pipeline_args.update(mutable_dict)
|
||||||
|
connected_args.update(arg_list)
|
||||||
|
mutable_dict = {k: v for k, v in self._pipeline_args.items() if k not in connected_args}
|
||||||
|
self._task.connect(
|
||||||
|
mutable_dict, name=self._args_section
|
||||||
|
)
|
||||||
|
new_pipeline_args.update(mutable_dict)
|
||||||
|
self._pipeline_args = new_pipeline_args
|
||||||
self._task.connect(params, name=self._pipeline_section)
|
self._task.connect(params, name=self._pipeline_section)
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
if self._task._get_runtime_properties().get(self._runtime_property_hash):
|
if self._task._get_runtime_properties().get(self._runtime_property_hash):
|
||||||
@ -1688,6 +1709,10 @@ class PipelineController(object):
|
|||||||
sankey_link['target'].append(idx)
|
sankey_link['target'].append(idx)
|
||||||
sankey_link['value'].append(1)
|
sankey_link['value'].append(1)
|
||||||
|
|
||||||
|
# if nothing changed, we give up
|
||||||
|
if nodes == next_nodes:
|
||||||
|
break
|
||||||
|
|
||||||
nodes = next_nodes
|
nodes = next_nodes
|
||||||
|
|
||||||
# make sure we have no independent (unconnected) nodes
|
# make sure we have no independent (unconnected) nodes
|
||||||
@ -1879,6 +1904,19 @@ class PipelineController(object):
|
|||||||
description="pipeline state: {}".format(hash_dict(pipeline_dag)),
|
description="pipeline state: {}".format(hash_dict(pipeline_dag)),
|
||||||
config_text=json.dumps(pipeline_dag, indent=2))
|
config_text=json.dumps(pipeline_dag, indent=2))
|
||||||
|
|
||||||
|
def _update_progress(self):
|
||||||
|
# type: () -> ()
|
||||||
|
"""
|
||||||
|
Update progress of the pipeline every PipelineController._update_progress_interval seconds.
|
||||||
|
Progress is calculated as the mean of the progress of each step in the pipeline.
|
||||||
|
"""
|
||||||
|
if time() - self._last_progress_update_time < self._update_progress_interval:
|
||||||
|
return
|
||||||
|
job_progress = [(node.job.task.get_progress() or 0) if node.job else 0 for node in self._nodes.values()]
|
||||||
|
if len(job_progress):
|
||||||
|
self._task.set_progress(int(sum(job_progress) / len(job_progress)))
|
||||||
|
self._last_progress_update_time = time()
|
||||||
|
|
||||||
def _daemon(self):
|
def _daemon(self):
|
||||||
# type: () -> ()
|
# type: () -> ()
|
||||||
"""
|
"""
|
||||||
@ -1900,6 +1938,7 @@ class PipelineController(object):
|
|||||||
if self._pipeline_time_limit and (time() - self._start_time) > self._pipeline_time_limit:
|
if self._pipeline_time_limit and (time() - self._start_time) > self._pipeline_time_limit:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
self._update_progress()
|
||||||
# check the state of all current jobs
|
# check the state of all current jobs
|
||||||
# if no a job ended, continue
|
# if no a job ended, continue
|
||||||
completed_jobs = []
|
completed_jobs = []
|
||||||
@ -2534,6 +2573,7 @@ class PipelineDecorator(PipelineController):
|
|||||||
if self._pipeline_time_limit and (time() - self._start_time) > self._pipeline_time_limit:
|
if self._pipeline_time_limit and (time() - self._start_time) > self._pipeline_time_limit:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
self._update_progress()
|
||||||
# check the state of all current jobs
|
# check the state of all current jobs
|
||||||
# if no a job ended, continue
|
# if no a job ended, continue
|
||||||
completed_jobs = []
|
completed_jobs = []
|
||||||
@ -3088,7 +3128,8 @@ class PipelineDecorator(PipelineController):
|
|||||||
abort_on_failure=False, # type: bool
|
abort_on_failure=False, # type: bool
|
||||||
pipeline_execution_queue='services', # type: Optional[str]
|
pipeline_execution_queue='services', # type: Optional[str]
|
||||||
multi_instance_support=False, # type: bool
|
multi_instance_support=False, # type: bool
|
||||||
add_run_number=True # type: bool
|
add_run_number=True, # type: bool
|
||||||
|
args_map=None # type: dict[str, List[str]]
|
||||||
):
|
):
|
||||||
# type: (...) -> Callable
|
# type: (...) -> Callable
|
||||||
"""
|
"""
|
||||||
@ -3122,6 +3163,19 @@ class PipelineDecorator(PipelineController):
|
|||||||
Default False, no multi instance pipeline support.
|
Default False, no multi instance pipeline support.
|
||||||
:param add_run_number: If True (default), add the run number of the pipeline to the pipeline name.
|
:param add_run_number: If True (default), add the run number of the pipeline to the pipeline name.
|
||||||
Example, the second time we launch the pipeline "best pipeline", we rename it to "best pipeline #2"
|
Example, the second time we launch the pipeline "best pipeline", we rename it to "best pipeline #2"
|
||||||
|
:param args_map: Map arguments to their specific configuration section. Arguments not included in this map
|
||||||
|
will default to `Args` section. For example, for the following code:
|
||||||
|
|
||||||
|
.. code-block:: python
|
||||||
|
@PipelineDecorator.pipeline(args_map={'sectionA':['paramA'], 'sectionB:['paramB','paramC']
|
||||||
|
def executing_pipeline(paramA, paramB, paramC, paramD):
|
||||||
|
pass
|
||||||
|
|
||||||
|
Parameters would be stored as:
|
||||||
|
- paramA: sectionA/paramA
|
||||||
|
- paramB: sectionB/paramB
|
||||||
|
- paramC: sectionB/paramC
|
||||||
|
- paramD: Args/paramD
|
||||||
"""
|
"""
|
||||||
def decorator_wrap(func):
|
def decorator_wrap(func):
|
||||||
|
|
||||||
@ -3201,6 +3255,8 @@ class PipelineDecorator(PipelineController):
|
|||||||
add_run_number=add_run_number,
|
add_run_number=add_run_number,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
a_pipeline._args_map = args_map or {}
|
||||||
|
|
||||||
if PipelineDecorator._debug_execute_step_process:
|
if PipelineDecorator._debug_execute_step_process:
|
||||||
a_pipeline._clearml_job_class = LocalClearmlJob
|
a_pipeline._clearml_job_class = LocalClearmlJob
|
||||||
a_pipeline._default_execution_queue = 'mock'
|
a_pipeline._default_execution_queue = 'mock'
|
||||||
|
Loading…
Reference in New Issue
Block a user