mirror of
				https://github.com/clearml/clearml
				synced 2025-06-26 18:16:07 +00:00 
			
		
		
		
	Add clearml-data set-description and rename options
This commit is contained in:
		
							parent
							
								
									ae8b8e79d0
								
							
						
					
					
						commit
						df1c134a7a
					
				| @ -2,7 +2,7 @@ import getpass | ||||
| import re | ||||
| from _socket import gethostname | ||||
| from datetime import datetime | ||||
| from typing import Optional | ||||
| from typing import Optional, Any | ||||
| 
 | ||||
| try: | ||||
|     from datetime import timezone | ||||
| @ -63,6 +63,26 @@ def get_existing_project(session, project_name): | ||||
|     return "" | ||||
| 
 | ||||
| 
 | ||||
| def rename_project(session, project_name, new_project_name): | ||||
|     # type: (Any, str, str) -> bool | ||||
|     """ | ||||
|     Rename a project | ||||
| 
 | ||||
|     :param session: Session to send the request through | ||||
|     :param project_name: Name of the project you want to rename | ||||
|     :param new_project_name: New name for the project | ||||
| 
 | ||||
|     :return: True if the rename succeded and False otherwise | ||||
|     """ | ||||
|     project_id = get_existing_project(session, project_name) | ||||
|     if not project_id: | ||||
|         return False | ||||
|     res = session.send(projects.UpdateRequest(project=project_id, name=new_project_name)) | ||||
|     if res and res.response and res.response.updated: | ||||
|         return True | ||||
|     return False | ||||
| 
 | ||||
| 
 | ||||
| def get_or_create_project(session, project_name, description=None, system_tags=None, project_id=None): | ||||
|     """Return the ID of an existing project, or if it does not exist, make a new one and return that ID instead.""" | ||||
|     project_system_tags = [] | ||||
|  | ||||
| @ -104,6 +104,21 @@ def cli(): | ||||
|     add.add_argument('--verbose', action='store_true', default=False, help='Verbose reporting') | ||||
|     add.set_defaults(func=ds_add) | ||||
| 
 | ||||
|     set_description = subparsers.add_parser("set-description", help="Set description to the dataset") | ||||
|     set_description.add_argument( | ||||
|         "--description", | ||||
|         type=str, | ||||
|         required=True, | ||||
|         help="Description of the dataset", | ||||
|     ) | ||||
|     set_description.add_argument( | ||||
|         "--id", | ||||
|         type=str, | ||||
|         required=False, | ||||
|         help="Previously created dataset id. Default: previously created/accessed dataset", | ||||
|     ) | ||||
|     set_description.set_defaults(func=ds_set_description) | ||||
| 
 | ||||
|     sync = subparsers.add_parser('sync', help='Sync a local folder with the dataset') | ||||
|     sync.add_argument('--id', type=str, required=False, | ||||
|                       help='Previously created dataset id. Default: previously created/accessed dataset') | ||||
| @ -120,6 +135,13 @@ def cli(): | ||||
|     sync.add_argument('--name', type=str, required=False, default=None, | ||||
|                       help='[Optional] Dataset project name') | ||||
|     sync.add_argument("--version", type=str, required=False, default=None, help="[Optional] Dataset version") | ||||
|     sync.add_argument( | ||||
|         "--output-uri", | ||||
|         type=str, | ||||
|         required=False, | ||||
|         default=None, | ||||
|         help="[Optional] Output URI for artifacts/debug samples. Useable when creating the dataset", | ||||
|     ) | ||||
|     sync.add_argument('--tags', type=str, nargs='*', | ||||
|                       help='[Optional] Dataset user Tags') | ||||
|     sync.add_argument('--storage', type=str, default=None, | ||||
| @ -178,13 +200,43 @@ def cli(): | ||||
|     publish.add_argument('--id', type=str, required=True, help='The dataset task id to be published.') | ||||
|     publish.set_defaults(func=ds_publish) | ||||
| 
 | ||||
|     delete = subparsers.add_parser('delete', help='Delete a dataset') | ||||
|     delete.add_argument('--id', type=str, required=False, | ||||
|                         help='Previously created dataset id. Default: previously created/accessed dataset') | ||||
|     delete.add_argument('--force', action='store_true', default=False, | ||||
|                         help='Force dataset deletion even if other dataset versions depend on it') | ||||
|     delete = subparsers.add_parser("delete", help="Delete a dataset") | ||||
|     delete.add_argument( | ||||
|         "--id", | ||||
|         type=str, | ||||
|         required=False, | ||||
|         help="Previously created dataset id. Default: previously created/accessed dataset", | ||||
|     ) | ||||
|     delete.add_argument( | ||||
|         "--project", type=str, required=False, help="The project the dataset(s) to be deleted belong(s) to" | ||||
|     ) | ||||
|     delete.add_argument("--name", type=str, required=False, help="The name of the dataset(s) to be deleted") | ||||
|     delete.add_argument("--version", type=str, required=False, help="The version of the dataset(s) to be deleted") | ||||
|     delete.add_argument( | ||||
|         "--force", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         help="Force dataset deletion even if other dataset versions depend on it. Must also be used if entire-dataset flag is used", | ||||
|     ) | ||||
|     delete.add_argument("--entire-dataset", action="store_true", default=False, help="Delete all found datasets") | ||||
|     delete.set_defaults(func=ds_delete) | ||||
| 
 | ||||
|     rename = subparsers.add_parser("rename", help="Rename a dataset") | ||||
|     rename.add_argument("--new-name", type=str, required=True, help="The new name of the dataset(s)") | ||||
|     rename.add_argument( | ||||
|         "--project", type=str, required=True, help="The project the dataset(s) to be renamed belong(s) to" | ||||
|     ) | ||||
|     rename.add_argument("--name", type=str, required=True, help="The name of the dataset(s) to be renamed") | ||||
|     rename.set_defaults(func=ds_rename) | ||||
| 
 | ||||
|     move = subparsers.add_parser("move", help="Move a dataset to another project") | ||||
|     move.add_argument("--new-project", type=str, required=True, help="The new project of the dataset(s)") | ||||
|     move.add_argument( | ||||
|         "--project", type=str, required=True, help="The project the dataset(s) to be moved belong(s) to" | ||||
|     ) | ||||
|     move.add_argument("--name", type=str, required=True, help="The name of the dataset(s) to be moved") | ||||
|     move.set_defaults(func=ds_move) | ||||
| 
 | ||||
|     compare = subparsers.add_parser('compare', help='Compare two datasets (target vs source)') | ||||
|     compare.add_argument('--source', type=str, required=True, help='Source dataset id (used as baseline)') | ||||
|     compare.add_argument('--target', type=str, required=True, | ||||
| @ -263,11 +315,54 @@ def cli(): | ||||
| 
 | ||||
| 
 | ||||
| def ds_delete(args): | ||||
|     print('Deleting dataset id {}'.format(args.id)) | ||||
|     check_null_id(args) | ||||
|     if args.id: | ||||
|         print("Deleting dataset id {}".format(args.id)) | ||||
|     else: | ||||
|         print("Deleting dataset with project={}, name={}, version={}".format(args.project, args.name, args.version)) | ||||
|     print_args(args) | ||||
|     Dataset.delete(dataset_id=args.id) | ||||
|     print('Dataset {} deleted'.format(args.id)) | ||||
|     Dataset.delete( | ||||
|         dataset_id=args.id, | ||||
|         dataset_project=args.project, | ||||
|         dataset_name=args.name, | ||||
|         dataset_version=args.version, | ||||
|         entire_dataset=args.entire_dataset, | ||||
|         force=args.force, | ||||
|     ) | ||||
|     print("Dataset(s) deleted") | ||||
|     clear_state() | ||||
|     return 0 | ||||
| 
 | ||||
| 
 | ||||
| def ds_rename(args): | ||||
|     print( | ||||
|         "Renaming dataset with project={}, name={} to {}".format( | ||||
|             args.project, args.name, args.new_name | ||||
|         ) | ||||
|     ) | ||||
|     print_args(args) | ||||
|     Dataset.rename( | ||||
|         args.new_name, | ||||
|         dataset_project=args.project, | ||||
|         dataset_name=args.name, | ||||
|     ) | ||||
|     print("Dataset(s) renamed") | ||||
|     clear_state() | ||||
|     return 0 | ||||
| 
 | ||||
| 
 | ||||
| def ds_move(args): | ||||
|     print( | ||||
|         "Moving dataset with project={}, name={} to {}".format( | ||||
|             args.project, args.name, args.new_project | ||||
|         ) | ||||
|     ) | ||||
|     print_args(args) | ||||
|     Dataset.move_to_project( | ||||
|         args.new_project, | ||||
|         dataset_project=args.project, | ||||
|         dataset_name=args.name, | ||||
|     ) | ||||
|     print("Dataset(s) moved") | ||||
|     clear_state() | ||||
|     return 0 | ||||
| 
 | ||||
| @ -533,6 +628,15 @@ def ds_create(args): | ||||
|     return ds.id | ||||
| 
 | ||||
| 
 | ||||
| def ds_set_description(args): | ||||
|     check_null_id(args) | ||||
|     print("Setting description '{}' to dataset {}".format(args.description, args.id)) | ||||
|     print_args(args) | ||||
|     ds = Dataset.get(dataset_id=args.id) | ||||
|     ds.set_description(args.description) | ||||
|     return 0 | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     try: | ||||
|         exit(cli()) | ||||
|  | ||||
| @ -18,7 +18,7 @@ from .. import Task, StorageManager, Logger | ||||
| from ..backend_api.session.client import APIClient | ||||
| from ..backend_api import Session | ||||
| from ..backend_interface.task.development.worker import DevWorker | ||||
| from ..backend_interface.util import mutually_exclusive, exact_match_regex, get_or_create_project | ||||
| from ..backend_interface.util import mutually_exclusive, exact_match_regex, get_or_create_project, rename_project | ||||
| from ..config import deferred_config, running_remotely, get_remote_task_id | ||||
| from ..debugging.log import LoggerRoot | ||||
| from ..storage.helper import StorageHelper | ||||
| @ -106,6 +106,7 @@ class Dataset(object): | ||||
|     __dataset_folder_template = CacheManager.set_context_folder_lookup(__cache_context, "{0}_archive_{1}") | ||||
|     __preview_max_file_entries = 15000 | ||||
|     __preview_max_size = 5 * 1024 * 1024 | ||||
|     __min_api_version = "2.20" | ||||
|     __hyperparams_section = "Datasets" | ||||
|     __datasets_runtime_prop = "datasets" | ||||
|     __orig_datasets_runtime_prop_prefix = "orig_datasets" | ||||
| @ -169,7 +170,7 @@ class Dataset(object): | ||||
|             dataset_project, parent_project = self._build_hidden_project_name(dataset_project, dataset_name) | ||||
|             task = Task.create( | ||||
|                 project_name=dataset_project, task_name=dataset_name, task_type=Task.TaskTypes.data_processing) | ||||
|             if bool(Session.check_min_api_server_version("2.17")): | ||||
|             if bool(Session.check_min_api_server_version(Dataset.__min_api_version)): | ||||
|                 get_or_create_project(task.session, project_name=parent_project, system_tags=[self.__hidden_tag]) | ||||
|                 get_or_create_project( | ||||
|                     task.session, | ||||
| @ -184,10 +185,12 @@ class Dataset(object): | ||||
|                 task.set_tags((task.get_tags() or []) + list(dataset_tags)) | ||||
|             task.mark_started() | ||||
|             # generate the script section | ||||
|             script = \ | ||||
|                 'from clearml import Dataset\n\n' \ | ||||
|                 'ds = Dataset.create(dataset_project=\'{dataset_project}\', dataset_name=\'{dataset_name}\')\n'.format( | ||||
|                     dataset_project=dataset_project, dataset_name=dataset_name) | ||||
|             script = ( | ||||
|                 "from clearml import Dataset\n\n" | ||||
|                 "ds = Dataset.create(dataset_project='{dataset_project}', dataset_name='{dataset_name}', dataset_version='{dataset_version}')\n".format( | ||||
|                     dataset_project=dataset_project, dataset_name=dataset_name, dataset_version=dataset_version | ||||
|                 ) | ||||
|             ) | ||||
|             task.data.script.diff = script | ||||
|             task.data.script.working_dir = '.' | ||||
|             task.data.script.entry_point = 'register_dataset.py' | ||||
| @ -223,12 +226,13 @@ class Dataset(object): | ||||
|                         ) | ||||
|                     ) | ||||
|         runtime_props = { | ||||
|             "orig_dataset_name": self._task._get_runtime_properties().get("orig_dataset_name", self._task.name), | ||||
|             "orig_dataset_id": self._task._get_runtime_properties().get("orig_dataset_id", self._task.id), | ||||
|             "orig_dataset_name": self._task._get_runtime_properties().get("orig_dataset_name", self._task.name),  # noqa | ||||
|             "orig_dataset_id": self._task._get_runtime_properties().get("orig_dataset_id", self._task.id),  # noqa | ||||
|         } | ||||
|         if not self._dataset_version: | ||||
|             self._dataset_version = self.__default_dataset_version | ||||
|         runtime_props["version"] = self._dataset_version | ||||
|         # noinspection PyProtectedMember | ||||
|         self._task.set_user_properties(version=self._dataset_version) | ||||
|         # noinspection PyProtectedMember | ||||
|         self._task._set_runtime_properties(runtime_props) | ||||
| @ -246,6 +250,19 @@ class Dataset(object): | ||||
|         self._dependency_chunk_lookup = None  # type: Optional[Dict[str, int]] | ||||
|         self._ds_total_size = None | ||||
|         self._ds_total_size_compressed = None | ||||
|         ( | ||||
|             self.__preview_tables_count, | ||||
|             self.__preview_image_count, | ||||
|             self.__preview_video_count, | ||||
|             self.__preview_audio_count, | ||||
|             self.__preview_html_count, | ||||
|         ) = ( | ||||
|             0, | ||||
|             0, | ||||
|             0, | ||||
|             0, | ||||
|             0, | ||||
|         ) | ||||
| 
 | ||||
|     @property | ||||
|     def id(self): | ||||
| @ -288,6 +305,8 @@ class Dataset(object): | ||||
|     @property | ||||
|     def name(self): | ||||
|         # type: () -> str | ||||
|         if bool(Session.check_min_api_server_version(Dataset.__min_api_version)): | ||||
|             return self._task.get_project_name().partition("/.datasets/")[-1] | ||||
|         return self._task.name | ||||
| 
 | ||||
|     @property | ||||
| @ -601,6 +620,8 @@ class Dataset(object): | ||||
|         :param max_workers: Numbers of threads to be spawned when zipping and uploading the files. | ||||
|             Defaults to the number of logical cores. | ||||
|         """ | ||||
|         self._report_dataset_preview() | ||||
| 
 | ||||
|         if not max_workers: | ||||
|             max_workers = psutil.cpu_count() | ||||
| 
 | ||||
| @ -727,7 +748,6 @@ class Dataset(object): | ||||
|         self._add_script_call('finalize') | ||||
|         if verbose: | ||||
|             print('Updating statistics and genealogy') | ||||
|         self._report_dataset_preview() | ||||
|         self._report_dataset_struct() | ||||
|         self._report_dataset_genealogy() | ||||
|         if self._using_current_task: | ||||
| @ -1151,15 +1171,16 @@ class Dataset(object): | ||||
|         instance._task.get_logger().report_text( | ||||
|             "ClearML results page: {}".format(instance._task.get_output_log_web_page()) | ||||
|         ) | ||||
|         instance._task.get_logger().report_text( | ||||
|             "ClearML dataset page: {}".format( | ||||
|                 "{}/datasets/simple/{}/experiments/{}".format( | ||||
|                     instance._task._get_app_server(), | ||||
|                     instance._task.project if instance._task.project is not None else "*", | ||||
|                     instance._task.id, | ||||
|         if bool(Session.check_min_api_server_version(cls.__min_api_version)): | ||||
|             instance._task.get_logger().report_text( | ||||
|                 "ClearML dataset page: {}".format( | ||||
|                     "{}/datasets/simple/{}/experiments/{}".format( | ||||
|                         instance._task._get_app_server(), | ||||
|                         instance._task.project if instance._task.project is not None else "*", | ||||
|                         instance._task.id, | ||||
|                     ) | ||||
|                 ) | ||||
|             ) | ||||
|         ) | ||||
|         # noinspection PyProtectedMember | ||||
|         instance._task.flush(wait_for_uploads=True) | ||||
|         # noinspection PyProtectedMember | ||||
| @ -1266,8 +1287,8 @@ class Dataset(object): | ||||
|         raise an Exception or move the entire dataset if `entire_dataset` is True and `force` is True | ||||
| 
 | ||||
|         :param dataset_id: The ID of the dataset(s) to be deleted | ||||
|         :param dataset_project: The project the dataset(s) to be deletedd belongs to | ||||
|         :param dataset_name: The name of the dataset(s) (before renaming) | ||||
|         :param dataset_project: The project the dataset(s) to be deleted belong(s) to | ||||
|         :param dataset_name: The name of the dataset(s) to be deleted | ||||
|         :param force: If True, deleted the dataset(s) even when being used. Also required to be set to | ||||
|             True when `entire_dataset` is set. | ||||
|         :param dataset_version: The version of the dataset(s) to be deletedd | ||||
| @ -1318,55 +1339,31 @@ class Dataset(object): | ||||
|     @classmethod | ||||
|     def rename( | ||||
|         cls, | ||||
|         new_name,  # str | ||||
|         dataset_id=None,  # Optional[str] | ||||
|         dataset_project=None,  # Optional[str] | ||||
|         dataset_name=None,  # Optional[str] | ||||
|         dataset_version=None,  # Optional[str] | ||||
|         entire_dataset=False,  # bool | ||||
|         force=False,  # bool | ||||
|         new_dataset_name,  # str | ||||
|         dataset_project,  # str | ||||
|         dataset_name,  # str | ||||
|     ): | ||||
|         # type: (...) -> () | ||||
|         """ | ||||
|         Rename the dataset(s). If multiple datasets match the parameters, | ||||
|         raise an Exception or move the entire dataset if `entire_dataset` is True and `force` is True | ||||
|         Rename the dataset. | ||||
| 
 | ||||
|         :param new_name: The new name of the dataset(s) to be renamed | ||||
|         :param dataset_id: The ID of the dataset(s) to be rename | ||||
|         :param dataset_project: The project the dataset(s) to be renamed belongs to | ||||
|         :param dataset_name: The name of the dataset(s) (before renaming) | ||||
|         :param dataset_version: The version of the dataset(s) to be renamed | ||||
|         :param entire_dataset: If True, rename all all datasets that match the given `dataset_project`, | ||||
|             `dataset_name`, `dataset_version`. Note that `force` has to be True if this paramer is True | ||||
|         :param force: If True, rename the dataset(s) even when being used. Also required to be set to | ||||
|             True when `entire_dataset` is set. | ||||
|         :param new_dataset_name: The new name of the datasets to be renamed | ||||
|         :param dataset_project: The project the datasets to be renamed belongs to | ||||
|         :param dataset_name: The name of the datasets (before renaming) | ||||
|         """ | ||||
|         if not any([dataset_id, dataset_project, dataset_name]): | ||||
|             raise ValueError("Dataset rename criteria not met. Didn't provide id/name/project correctly.") | ||||
| 
 | ||||
|         mutually_exclusive(dataset_id=dataset_id, dataset_project=dataset_project) | ||||
|         mutually_exclusive(dataset_id=dataset_id, dataset_name=dataset_name) | ||||
| 
 | ||||
|         # noinspection PyBroadException | ||||
|         try: | ||||
|             dataset_ids = cls._get_dataset_ids_respecting_params( | ||||
|                 dataset_id=dataset_id, | ||||
|                 dataset_project=dataset_project, | ||||
|                 dataset_name=dataset_name, | ||||
|                 force=force, | ||||
|                 dataset_version=dataset_version, | ||||
|                 entire_dataset=entire_dataset, | ||||
|                 action="rename", | ||||
|         if not bool(Session.check_min_api_server_version(cls.__min_api_version)): | ||||
|             LoggerRoot.get_base_logger().warning( | ||||
|                 "Could not rename dataset because API version < {}".format(cls.__min_api_version) | ||||
|             ) | ||||
|         except Exception as e: | ||||
|             LoggerRoot.get_base_logger().warning("Error: {}".format(str(e))) | ||||
|             return | ||||
|         for dataset_id in dataset_ids: | ||||
|             task = Task.get_task(task_id=dataset_id) | ||||
|             if not task.rename(new_name): | ||||
|                 LoggerRoot.get_base_logger().warning("Could not rename dataset with ID {}".format(dataset_id)) | ||||
|                 continue | ||||
|             cls._move_to_project_aux(task, task.get_project_name(), new_name) | ||||
|         project, _ = cls._build_hidden_project_name(dataset_project, dataset_name) | ||||
|         new_project, _ = cls._build_hidden_project_name(dataset_project, new_dataset_name) | ||||
|         # noinspection PyProtectedMember | ||||
|         result = rename_project(Task._get_default_session(), project, new_project) | ||||
|         if not result: | ||||
|             LoggerRoot.get_base_logger().warning( | ||||
|                 "Could not rename dataset with dataset_project={} dataset_name={}".format(dataset_project, dataset_name) | ||||
|             ) | ||||
| 
 | ||||
|     @classmethod | ||||
|     def _move_to_project_aux(cls, task, new_project, dataset_name): | ||||
| @ -1387,45 +1384,31 @@ class Dataset(object): | ||||
|     @classmethod | ||||
|     def move_to_project( | ||||
|         cls, | ||||
|         new_project,  # str | ||||
|         dataset_id=None,  # Optional[str] | ||||
|         dataset_project=None,  # Optional[str] | ||||
|         dataset_name=None,  # Optional[str] | ||||
|         dataset_version=None,  # Optional[str] | ||||
|         entire_dataset=False,  # bool | ||||
|         force=False,  # bool | ||||
|         new_dataset_project,  # str | ||||
|         dataset_project,  # str | ||||
|         dataset_name,  # str | ||||
|     ): | ||||
|         # type: (...) -> () | ||||
|         """ | ||||
|         Move the dataset(s) to a another project. If multiple datasets match the parameters, | ||||
|         raise an Exception or move the entire dataset if `entire_dataset` is True and `force` is True | ||||
|         Move the dataset to a another project. | ||||
| 
 | ||||
|         :param new_project: New project to move the dataset(s) to | ||||
|         :param dataset_id: ID of the datasets(s) to move to new project | ||||
|         :param new_dataset_project: New project to move the dataset(s) to | ||||
|         :param dataset_project: Project of the dataset(s) to move to new project | ||||
|         :param dataset_name: Name of the dataset(s) to move to new project | ||||
|         :param dataset_version: Version of the dataset(s) to move to new project | ||||
|         :param entire_dataset: If True, move  all datasets that match the given `dataset_project`, | ||||
|             `dataset_name`, `dataset_version`. Note that `force` has to be True if this paramer is True | ||||
|         :param force: If True, move the dataset(s) even when being used. Also required to be set to | ||||
|             True when `entire_dataset` is set. | ||||
|         """ | ||||
|         if not any([dataset_id, dataset_project, dataset_name]): | ||||
|             raise ValueError("Dataset move criteria not met. Didn't provide id/name/project correctly.") | ||||
| 
 | ||||
|         mutually_exclusive(dataset_id=dataset_id, dataset_project=dataset_project) | ||||
|         mutually_exclusive(dataset_id=dataset_id, dataset_name=dataset_name) | ||||
| 
 | ||||
|         if not bool(Session.check_min_api_server_version(cls.__min_api_version)): | ||||
|             LoggerRoot.get_base_logger().warning( | ||||
|                 "Could not move dataset to another project because API version < {}".format(cls.__min_api_version) | ||||
|             ) | ||||
|             return | ||||
|         # noinspection PyBroadException | ||||
|         try: | ||||
|             dataset_ids = cls._get_dataset_ids_respecting_params( | ||||
|                 dataset_id=dataset_id, | ||||
|                 dataset_project=dataset_project, | ||||
|                 dataset_name=dataset_name, | ||||
|                 force=force, | ||||
|                 dataset_version=dataset_version, | ||||
|                 entire_dataset=entire_dataset, | ||||
|                 action="move to project", | ||||
|                 entire_dataset=True, | ||||
|                 force=True, | ||||
|                 action="move", | ||||
|             ) | ||||
|         except Exception as e: | ||||
|             LoggerRoot.get_base_logger().warning("Error: {}".format(str(e))) | ||||
| @ -1439,12 +1422,7 @@ class Dataset(object): | ||||
|             if not dataset: | ||||
|                 LoggerRoot.get_base_logger().warning("Could not find dataset to move to another project") | ||||
|                 continue | ||||
|             if not bool(Session.check_min_api_server_version("2.17")): | ||||
|                 LoggerRoot.get_base_logger().warning( | ||||
|                     "Could not move dataset to another project because API version < 2.17" | ||||
|                 ) | ||||
|                 continue | ||||
|             cls._move_to_project_aux(dataset._task, new_project, dataset.name) | ||||
|             cls._move_to_project_aux(dataset._task, new_dataset_project, dataset.name) | ||||
| 
 | ||||
|     @classmethod | ||||
|     def get( | ||||
| @ -2675,13 +2653,6 @@ class Dataset(object): | ||||
| 
 | ||||
|         compression_extensions = {".gz", ".bz2", ".zip", ".xz", ".zst"} | ||||
|         tabular_extensions = {".csv", ".parquet", ".parq", ".npz", ".npy"} | ||||
|         preview_tables_count, preview_image_count, preview_video_count, preview_audio_count, preview_html_count = ( | ||||
|             0, | ||||
|             0, | ||||
|             0, | ||||
|             0, | ||||
|             0, | ||||
|         ) | ||||
|         for file in self._dataset_file_entries.values(): | ||||
|             if file.local_path: | ||||
|                 file_path = file.local_path | ||||
| @ -2695,7 +2666,7 @@ class Dataset(object): | ||||
|             if file_extension in compression_extensions: | ||||
|                 compression = file_extension | ||||
|                 _, file_extension = os.path.splitext(file_path[: -len(file_extension)]) | ||||
|             if file_extension in tabular_extensions and preview_tables_count >= self.__preview_tabular_table_count: | ||||
|             if file_extension in tabular_extensions and self.__preview_tables_count >= self.__preview_tabular_table_count: | ||||
|                 continue | ||||
|             artifact = convert_to_tabular_artifact(file_path, file_extension, compression) | ||||
|             if artifact is not None: | ||||
| @ -2704,7 +2675,7 @@ class Dataset(object): | ||||
|                     self._task.get_logger().report_media( | ||||
|                         "Tables", file_name, stream=artifact.to_csv(index=False), file_extension=".txt" | ||||
|                     ) | ||||
|                     preview_tables_count += 1 | ||||
|                     self.__preview_tables_count += 1 | ||||
|                 except Exception: | ||||
|                     pass | ||||
|                 continue | ||||
| @ -2714,18 +2685,18 @@ class Dataset(object): | ||||
|             if not guessed_type or not guessed_type[0]: | ||||
|                 continue | ||||
|             guessed_type = guessed_type[0] | ||||
|             if guessed_type.startswith("image") and preview_image_count < self.__preview_media_image_count: | ||||
|             if guessed_type.startswith("image") and self.__preview_image_count < self.__preview_media_image_count: | ||||
|                 self._task.get_logger().report_media("Images", file_name, local_path=file_path) | ||||
|                 preview_image_count += 1 | ||||
|             elif guessed_type.startswith("video") and preview_video_count < self.__preview_media_video_count: | ||||
|                 self.__preview_image_count += 1 | ||||
|             elif guessed_type.startswith("video") and self.__preview_video_count < self.__preview_media_video_count: | ||||
|                 self._task.get_logger().report_media("Videos", file_name, local_path=file_path) | ||||
|                 preview_video_count += 1 | ||||
|             elif guessed_type.startswith("audio") and preview_audio_count < self.__preview_media_audio_count: | ||||
|                 self.__preview_video_count += 1 | ||||
|             elif guessed_type.startswith("audio") and self.__preview_audio_count < self.__preview_media_audio_count: | ||||
|                 self._task.get_logger().report_media("Audio", file_name, local_path=file_path) | ||||
|                 preview_audio_count += 1 | ||||
|             elif guessed_type == "text/html" and preview_html_count < self.__preview_media_html_count: | ||||
|                 self.__preview_audio_count += 1 | ||||
|             elif guessed_type == "text/html" and self.__preview_html_count < self.__preview_media_html_count: | ||||
|                 self._task.get_logger().report_media("HTML", file_name, local_path=file_path) | ||||
|                 preview_html_count += 1 | ||||
|                 self.__preview_html_count += 1 | ||||
| 
 | ||||
|     @classmethod | ||||
|     def _set_project_system_tags(cls, task): | ||||
| @ -3005,7 +2976,7 @@ class Dataset(object): | ||||
|             is the parent project | ||||
|         """ | ||||
|         dataset_project = cls._remove_hidden_part_from_dataset_project(dataset_project) | ||||
|         if bool(Session.check_min_api_server_version("2.17")): | ||||
|         if bool(Session.check_min_api_server_version(cls.__min_api_version)): | ||||
|             parent_project = "{}.datasets".format(dataset_project + "/" if dataset_project else "") | ||||
|             project_name = "{}/{}".format(parent_project, dataset_name) | ||||
|         else: | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 allegroai
						allegroai