Add task.upload_artifact auto_pickle=True #153

Fix multiple argparse calls before task.init
Add mark_started force argument #159
This commit is contained in:
allegroai 2020-07-02 01:21:00 +03:00
parent efd843d863
commit b07a17890d
2 changed files with 168 additions and 58 deletions

View File

@ -2,6 +2,7 @@ import hashlib
import json import json
import mimetypes import mimetypes
import os import os
import pickle
from six.moves.urllib.parse import quote from six.moves.urllib.parse import quote
from copy import deepcopy from copy import deepcopy
from datetime import datetime from datetime import datetime
@ -160,6 +161,9 @@ class Artifact(object):
elif self.type == 'JSON': elif self.type == 'JSON':
with open(local_file, 'rt') as f: with open(local_file, 'rt') as f:
self._object = json.load(f) self._object = json.load(f)
elif self.type == 'pickle':
with open(local_file, 'rb') as f:
self._object = pickle.load(f)
local_file = Path(local_file) local_file = Path(local_file)
@ -197,6 +201,8 @@ class Artifact(object):
class Artifacts(object): class Artifacts(object):
max_preview_size_bytes = 65536
_flush_frequency_sec = 300. _flush_frequency_sec = 300.
# notice these two should match # notice these two should match
_save_format = '.csv.gz' _save_format = '.csv.gz'
@ -293,8 +299,8 @@ class Artifacts(object):
self._unregister_request.add(name) self._unregister_request.add(name)
self.flush() self.flush()
def upload_artifact(self, name, artifact_object=None, metadata=None, delete_after_upload=False): def upload_artifact(self, name, artifact_object=None, metadata=None, delete_after_upload=False, auto_pickle=True):
# type: (str, Optional[object], Optional[dict], bool) -> bool # type: (str, Optional[object], Optional[dict], bool, bool) -> bool
if not Session.check_min_api_version('2.3'): if not Session.check_min_api_version('2.3'):
LoggerRoot.get_base_logger().warning('Artifacts not supported by your TRAINS-server version, ' LoggerRoot.get_base_logger().warning('Artifacts not supported by your TRAINS-server version, '
'please upgrade to the latest server version') 'please upgrade to the latest server version')
@ -303,6 +309,16 @@ class Artifacts(object):
if name in self._artifacts_container: if name in self._artifacts_container:
raise ValueError("Artifact by the name of {} is already registered, use register_artifact".format(name)) raise ValueError("Artifact by the name of {} is already registered, use register_artifact".format(name))
# convert string to object if try is a file/folder (dont try to serialize long texts
if isinstance(artifact_object, six.string_types) and len(artifact_object) < 2048:
# noinspection PyBroadException
try:
artifact_path = Path(artifact_object)
if artifact_path.exists():
artifact_object = artifact_path
except Exception:
pass
artifact_type_data = tasks.ArtifactTypeData() artifact_type_data = tasks.ArtifactTypeData()
override_filename_in_uri = None override_filename_in_uri = None
override_filename_ext_in_uri = None override_filename_ext_in_uri = None
@ -347,19 +363,15 @@ class Artifacts(object):
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri) fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
os.write(fd, bytes(preview.encode())) os.write(fd, bytes(preview.encode()))
os.close(fd) os.close(fd)
artifact_type_data.preview = preview if len(preview) < self.max_preview_size_bytes:
artifact_type_data.preview = preview
else:
artifact_type_data.preview = '# full json too large to store, storing first {}kb\n{}'.format(
len(preview)//1024, preview[:self.max_preview_size_bytes]
)
delete_after_upload = True delete_after_upload = True
elif ( elif isinstance(artifact_object, (Path, pathlib_Path,) if pathlib_Path is not None else (Path,)):
isinstance(artifact_object, six.string_types)
and urlparse(artifact_object).scheme in remote_driver_schemes
):
# we should not upload this, just register
local_filename = None
uri = artifact_object
artifact_type = 'custom'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
elif isinstance(
artifact_object, six.string_types + (Path, pathlib_Path,) if pathlib_Path is not None else (Path,)):
# check if single file # check if single file
artifact_object = Path(artifact_object) artifact_object = Path(artifact_object)
@ -420,6 +432,32 @@ class Artifacts(object):
artifact_type = 'custom' artifact_type = 'custom'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0] artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
local_filename = artifact_object local_filename = artifact_object
elif (
isinstance(artifact_object, six.string_types)
and urlparse(artifact_object).scheme in remote_driver_schemes
):
# we should not upload this, just register
local_filename = None
uri = artifact_object
artifact_type = 'custom'
artifact_type_data.content_type = mimetypes.guess_type(artifact_object)[0]
elif auto_pickle:
# if we are here it means we do not know what to do with the object, so we serialize it with pickle.
artifact_type = 'pickle'
artifact_type_data.content_type = 'application/pickle'
artifact_type_data.preview = str(artifact_object.__repr__())[:self.max_preview_size_bytes]
delete_after_upload = True
override_filename_ext_in_uri = '.pkl'
override_filename_in_uri = name + override_filename_ext_in_uri
fd, local_filename = mkstemp(prefix=quote(name, safe="") + '.', suffix=override_filename_ext_in_uri)
os.close(fd)
try:
with open(local_filename, 'wb') as f:
pickle.dump(artifact_object, f)
except Exception as ex:
# cleanup and raise exception
os.unlink(local_filename)
raise
else: else:
raise ValueError("Artifact type {} not supported".format(type(artifact_object))) raise ValueError("Artifact type {} not supported".format(type(artifact_object)))

View File

@ -80,10 +80,16 @@ class Task(_Task):
For detailed information about creating Task objects, see the following methods: For detailed information about creating Task objects, see the following methods:
- :meth:`Task.init` - Create a new reproducible Task, or reuse one. - Create a new reproducible Task - :meth:`Task.init`
- :meth:`Task.create` - Create a new non-reproducible Task.
- :meth:`Task.current_task` - Get the current running Task. .. important::
- :meth:`Task.get_task` - Get another Task (whose metadata the **Trains Server** maintains). In some cases, ``Task.init`` may return a Task object which is already stored in **Trains Server** (already
initialized), instead of creating a new Task. For a detailed explanation of those cases, see the ``Task.init``
method.
- Create a new non-reproducible Task - :meth:`Task.create`
- Get the current running Task - :meth:`Task.current_task`
- Get another (different) Task - :meth:`Task.get_task`
.. note:: .. note::
The **Trains** documentation often refers to a Task as, "Task (experiment)". The **Trains** documentation often refers to a Task as, "Task (experiment)".
@ -98,8 +104,8 @@ class Task(_Task):
Therefore, a "Task" is effectively an "experiment", and "Task (experiment)" encompasses its usage throughout Therefore, a "Task" is effectively an "experiment", and "Task (experiment)" encompasses its usage throughout
the Trains. the Trains.
The exception to this Task behavior is sub-tasks (non-reproducible Tasks), which do not use the main execution The exception to this Task behavior is sub-tasks (non-reproducible Tasks), which do not use the main execution
Task. Creating a sub-task always creates a new Task with a new Task ID. Task. Creating a sub-task always creates a new Task with a new Task ID.
""" """
TaskTypes = _Task.TaskTypes TaskTypes = _Task.TaskTypes
@ -176,22 +182,43 @@ class Task(_Task):
): ):
# type: (...) -> Task # type: (...) -> Task
""" """
Creates a new Task (experiment), or returns the existing Task, depending upon the following: Creates a new Task (experiment) if:
- If **any** of the following are true, Trains creates a new Task and a new Task ID: - The Task never ran before. No Task with the same ``task_name`` and ``project_name`` is stored in
**Trains Server**.
- The Task has run before (the same ``task_name`` and ``project_name``), and (a) it stored models and / or
artifacts, or (b) its status is Published , or (c) it is Archived.
- A new Task is forced by calling ``Task.init`` with ``reuse_last_task_id=False``.
- a Task in the same project with same name does not exist, **or** Otherwise, the already initialized Task object for the same ``task_name`` and ``project_name`` is returned.
- a Task in the same project with same name does exist and its status is ``Published``, **or**
- the ``reuse_last_task_id`` parameter is assigned ``False``.
- If **all** of the following are true, Trains returns the existing Task with the existing Task ID: .. note::
To reference another Task, instead of initializing the same Task more than once, call
:meth:`Task.get_task`. For example, to "share" the same experiment in more than one script,
call ``Task.get_task``. See the ``Task.get_task`` method for an example.
- a Task in the same project with the same name does exist, **and** For example:
- the Task's status is ``Draft``, ``Completed``, ``Failed``, or ``Aborted``, **and**
- the ``reuse_last_task_id`` parameter is the default value of ``True``.
.. warning:: The first time the following code runs, it will create a new Task. The status will be Completed.
When a Python experiment script runs using an existing Task, it overwrites previous experiment output.
.. code-block:: py
from trains import Task
task = Task.init('myProject', 'myTask')
If this code runs again, it will not create a new Task. It does not store a model or artifact,
it is not Published (its status Completed) , it was not Archived, and a new Task is not forced.
If the Task is Published or Archived, and run again, it will create a new Task with a new Task ID.
The following code will create a new Task every time it runs, because it stores an artifact.
.. code-block:: py
task = Task.init('myProject', 'myOtherTask')
d = {'a': '1'}
task.upload_artifact('myArtifact', d)
:param str project_name: The name of the project in which the experiment will be created. If the project does :param str project_name: The name of the project in which the experiment will be created. If the project does
not exist, it is created. If ``project_name`` is ``None``, the repository name is used. (Optional) not exist, it is created. If ``project_name`` is ``None``, the repository name is used. (Optional)
@ -464,8 +491,8 @@ class Task(_Task):
# Check if parse args already called. If so, sync task parameters with parser # Check if parse args already called. If so, sync task parameters with parser
if argparser_parseargs_called(): if argparser_parseargs_called():
parser, parsed_args = get_argparser_last_args() for parser, parsed_args in get_argparser_last_args():
task._connect_argparse(parser=parser, parsed_args=parsed_args) task._connect_argparse(parser=parser, parsed_args=parsed_args)
elif argparser_parseargs_called(): elif argparser_parseargs_called():
# actually we have nothing to do, in remote running, the argparser will ignore # actually we have nothing to do, in remote running, the argparser will ignore
# all non argparser parameters, only caveat if parameter connected with the same name # all non argparser parameters, only caveat if parameter connected with the same name
@ -494,8 +521,8 @@ class Task(_Task):
Create a new, non-reproducible Task (experiment). This is called a sub-task. Create a new, non-reproducible Task (experiment). This is called a sub-task.
.. note:: .. note::
- This method always creates a new Task. This method always creates a new, non-reproducible Task. To create a reproducible Task, call the
- To create reproducible Tasks, use the :meth:`Task.init` method. :meth:`Task.init` method. To reference another Task, call the :meth:`Task.get_task` method .
:param str project_name: The name of the project in which the experiment will be created. :param str project_name: The name of the project in which the experiment will be created.
If ``project_name`` is ``None``, and the main execution Task is initialized (see :meth:`Task.init`), If ``project_name`` is ``None``, and the main execution Task is initialized (see :meth:`Task.init`),
@ -545,6 +572,36 @@ class Task(_Task):
""" """
Get a Task by Id, or project name / task name combination. Get a Task by Id, or project name / task name combination.
For example:
The following code demonstrates calling ``Task.get_task`` to report a scalar to another Task. The output
of :meth:`.Logger.report_scalar` from testing is associated with the Task named ``training``. It allows
training and testing to run concurrently, because they initialized different Tasks (see :meth:`Task.init`
for information about initializing Tasks).
The training script:
.. code-block:: py
# initialize the training Task
task = Task.init('myProject', 'training')
# do some training
The testing script:
.. code-block:: py
# initialize the testing Task
task = Task.init('myProject', 'testing')
# get the training Task
train_task = Task.get_task(project_name='myProject', task_name='training')
# report metrics in the training Task
for x in range(10):
train_task.get_logger().report_scalar('title', 'series', value=x * 2, iteration=x)
:param str task_id: The Id (system UUID) of the experiment to get. :param str task_id: The Id (system UUID) of the experiment to get.
If specified, ``project_name`` and ``task_name`` are ignored. If specified, ``project_name`` and ``task_name`` are ignored.
:param str project_name: The project name of the Task to get. :param str project_name: The project name of the Task to get.
@ -968,22 +1025,28 @@ class Task(_Task):
""" """
return self._get_logger() return self._get_logger()
def mark_started(self): def mark_started(self, force=False):
# type: (bool) -> ()
""" """
Manually mark a Task as started (happens automatically) Manually mark a Task as started (happens automatically)
:param bool force: If True the task status will be changed to `started` regardless of the current Task state.
""" """
# UI won't let us see metrics if we're not started # UI won't let us see metrics if we're not started
self.started() self.started(force=force)
self.reload() self.reload()
def mark_stopped(self): def mark_stopped(self, force=False):
# type: (bool) -> ()
""" """
Manually mark a Task as stopped (also used in :meth:`_at_exit`) Manually mark a Task as stopped (also used in :meth:`_at_exit`)
:param bool force: If True the task status will be changed to `stopped` regardless of the current Task state.
""" """
# flush any outstanding logs # flush any outstanding logs
self.flush(wait_for_uploads=True) self.flush(wait_for_uploads=True)
# mark task as stopped # mark task as stopped
self.stopped() self.stopped(force=force)
def flush(self, wait_for_uploads=False): def flush(self, wait_for_uploads=False):
# type: (bool) -> bool # type: (bool) -> bool
@ -1114,9 +1177,10 @@ class Task(_Task):
def upload_artifact( def upload_artifact(
self, self,
name, # type: str name, # type: str
artifact_object, # type: Union[str, Mapping, pandas.DataFrame, numpy.ndarray, Image.Image] artifact_object, # type: Union[str, Mapping, pandas.DataFrame, numpy.ndarray, Image.Image, Any]
metadata=None, # type: Optional[Mapping] metadata=None, # type: Optional[Mapping]
delete_after_upload=False # type: bool delete_after_upload=False, # type: bool
auto_pickle=True, # type: bool
): ):
# type: (...) -> bool # type: (...) -> bool
""" """
@ -1130,6 +1194,7 @@ class Task(_Task):
- pandas.DataFrame - Trains stores a pandas.DataFrame as ``.csv.gz`` (compressed CSV) file and uploads it. - pandas.DataFrame - Trains stores a pandas.DataFrame as ``.csv.gz`` (compressed CSV) file and uploads it.
- numpy.ndarray - Trains stores a numpy.ndarray as ``.npz`` file and uploads it. - numpy.ndarray - Trains stores a numpy.ndarray as ``.npz`` file and uploads it.
- PIL.Image - Trains stores a PIL.Image as ``.png`` file and uploads it. - PIL.Image - Trains stores a PIL.Image as ``.png`` file and uploads it.
- Any - If called with auto_pickle=True, the object will be pickled and uploaded.
:param str name: The artifact name. :param str name: The artifact name.
@ -1144,6 +1209,10 @@ class Task(_Task):
- ``True`` - Delete the local copy of the artifact. - ``True`` - Delete the local copy of the artifact.
- ``False`` - Do not delete. (default) - ``False`` - Do not delete. (default)
:param bool auto_pickle: If True (default) and the artifact_object is not one of the following types:
pathlib2.Path, dict, pandas.DataFrame, numpy.ndarray, PIL.Image, url (string), local_file (string)
the artifact_object will be pickled and uploaded as pickle file artifact (with file extension .pkl)
:return: The status of the upload. :return: The status of the upload.
- ``True`` - Upload succeeded. - ``True`` - Upload succeeded.
@ -1151,8 +1220,9 @@ class Task(_Task):
:raise: If the artifact object type is not supported, raise a ``ValueError``. :raise: If the artifact object type is not supported, raise a ``ValueError``.
""" """
return self._artifacts_manager.upload_artifact(name=name, artifact_object=artifact_object, return self._artifacts_manager.upload_artifact(
metadata=metadata, delete_after_upload=delete_after_upload) name=name, artifact_object=artifact_object, metadata=metadata,
delete_after_upload=delete_after_upload, auto_pickle=auto_pickle)
def get_models(self): def get_models(self):
# type: () -> Dict[str, Sequence[Model]] # type: () -> Dict[str, Sequence[Model]]
@ -1269,16 +1339,6 @@ class Task(_Task):
self._reload_last_iteration() self._reload_last_iteration()
return max(self.data.last_iteration, self._reporter.max_iteration if self._reporter else 0) return max(self.data.last_iteration, self._reporter.max_iteration if self._reporter else 0)
def set_last_iteration(self, last_iteration):
# type: (int) -> None
"""
Forcefully set the last reported iteration, which is the last iteration for which the Task reported a metric.
:param int last_iteration: The last reported iteration number.
"""
self.data.last_iteration = int(last_iteration)
self._edit(last_iteration=self.data.last_iteration)
def set_initial_iteration(self, offset=0): def set_initial_iteration(self, offset=0):
# type: (int) -> int # type: (int) -> int
""" """
@ -1632,6 +1692,10 @@ class Task(_Task):
# set default docker image from env. # set default docker image from env.
task._set_default_docker_image() task._set_default_docker_image()
# mark us as the main Task, there should only be one dev Task at a time.
if not Task.__main_task:
Task.__main_task = task
# mark the task as started # mark the task as started
task.started() task.started()
# reload, making sure we are synced # reload, making sure we are synced
@ -1794,11 +1858,19 @@ class Task(_Task):
argparser_update_currenttask(self) argparser_update_currenttask(self)
if (parser is None or parsed_args is None) and argparser_parseargs_called(): if (parser is None or parsed_args is None) and argparser_parseargs_called():
_parser, _parsed_args = get_argparser_last_args() # if we have a parser but nor parsed_args, we need to find the parser
if parser is None: if parser and not parsed_args:
parser = _parser for _parser, _parsed_args in get_argparser_last_args():
if parsed_args is None and parser == _parser: if _parser == parser:
parsed_args = _parsed_args parsed_args = _parsed_args
break
else:
# prefer the first argparser (hopefully it is more relevant?!
for _parser, _parsed_args in get_argparser_last_args():
if parser is None:
parser = _parser
if parsed_args is None and parser == _parser:
parsed_args = _parsed_args
if running_remotely() and self.is_main_task(): if running_remotely() and self.is_main_task():
self._arguments.copy_to_parser(parser, parsed_args) self._arguments.copy_to_parser(parser, parsed_args)