mirror of
https://github.com/clearml/clearml
synced 2025-04-24 00:07:48 +00:00
Fix Dataset.list_datasets() returns an empty list
This commit is contained in:
parent
fb644fe9ec
commit
c0bbab75b8
@ -2435,6 +2435,26 @@ class Task(IdObjectBase, AccessMixin, SetupUploadMixin):
|
|||||||
return None
|
return None
|
||||||
return res.response.project.name
|
return res.response.project.name
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_project_names(cls, project_ids):
|
||||||
|
# type: (Sequence[str]) -> Dict[str, str]
|
||||||
|
page = -1
|
||||||
|
page_size = 500
|
||||||
|
all_responses = []
|
||||||
|
res = None
|
||||||
|
while True:
|
||||||
|
page += 1
|
||||||
|
res = cls._send(
|
||||||
|
cls._get_default_session(),
|
||||||
|
projects.GetAllRequest(id=list(project_ids), page=page, page_size=page_size),
|
||||||
|
raise_on_errors=False,
|
||||||
|
)
|
||||||
|
if res and res.response and res.response.projects:
|
||||||
|
all_responses.extend(res.response.projects)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return {p.id: p.name for p in all_responses}
|
||||||
|
|
||||||
def _get_all_events(
|
def _get_all_events(
|
||||||
self, max_events=100, batch_size=500, order='asc', event_type=None, unique_selector=itemgetter("url")
|
self, max_events=100, batch_size=500, order='asc', event_type=None, unique_selector=itemgetter("url")
|
||||||
):
|
):
|
||||||
|
@ -270,11 +270,20 @@ def cli():
|
|||||||
help='Verbose report all file changes (instead of summary)')
|
help='Verbose report all file changes (instead of summary)')
|
||||||
squash.set_defaults(func=ds_squash)
|
squash.set_defaults(func=ds_squash)
|
||||||
|
|
||||||
search = subparsers.add_parser('search', help='Search datasets in the system (sorted by creation time)')
|
search = subparsers.add_parser("search", help="Search datasets in the system (sorted by creation time)")
|
||||||
search.add_argument('--ids', type=str, nargs='*', help='Specify list of dataset IDs')
|
search.add_argument("--ids", type=str, nargs="*", help="Specify list of dataset IDs")
|
||||||
search.add_argument('--project', type=str, help='Specify datasets project name')
|
search.add_argument("--project", type=str, help="Specify datasets project name")
|
||||||
search.add_argument('--name', type=str, help='Specify datasets partial name matching')
|
search.add_argument("--name", type=str, help="Specify datasets partial name matching")
|
||||||
search.add_argument('--tags', type=str, nargs='*', help='Specify list of dataset user tags')
|
search.add_argument("--tags", type=str, nargs="*", help="Specify list of dataset user tags")
|
||||||
|
search.add_argument(
|
||||||
|
"--not-only-completed",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help="If set, return datasets that are still in progress as well",
|
||||||
|
)
|
||||||
|
search.add_argument(
|
||||||
|
"--non-recursive-project-search", action="store_true", default=False, help="Don't search inside subprojects"
|
||||||
|
)
|
||||||
search.set_defaults(func=ds_search)
|
search.set_defaults(func=ds_search)
|
||||||
|
|
||||||
verify = subparsers.add_parser('verify', help='Verify local dataset content')
|
verify = subparsers.add_parser('verify', help='Verify local dataset content')
|
||||||
@ -446,17 +455,28 @@ def ds_list(args):
|
|||||||
dataset_name=args.name or None,
|
dataset_name=args.name or None,
|
||||||
dataset_version=args.version,
|
dataset_version=args.version,
|
||||||
)
|
)
|
||||||
print('Listing dataset content')
|
|
||||||
formatting = '{:64} | {:10,} | {:64}'
|
|
||||||
print(formatting.replace(',', '').format('file name', 'size', 'hash'))
|
|
||||||
print('-' * len(formatting.replace(',', '').format('-', '-', '-')))
|
|
||||||
filters = args.filter if args.filter else [None]
|
filters = args.filter if args.filter else [None]
|
||||||
file_entries = ds.file_entries_dict
|
file_entries = ds.file_entries_dict
|
||||||
link_entries = ds.link_entries_dict
|
link_entries = ds.link_entries_dict
|
||||||
num_files = 0
|
file_name_max_len, size_max_len, hash_max_len = 64, 10, 64
|
||||||
total_size = 0
|
files_cache = []
|
||||||
for mask in filters:
|
for mask in filters:
|
||||||
files = ds.list_files(dataset_path=mask, dataset_id=ds.id if args.modified else None)
|
files = ds.list_files(dataset_path=mask, dataset_id=ds.id if args.modified else None)
|
||||||
|
files_cache.append(files)
|
||||||
|
for f in files:
|
||||||
|
e = link_entries.get(f)
|
||||||
|
if file_entries.get(f):
|
||||||
|
e = file_entries[f]
|
||||||
|
file_name_max_len = max(file_name_max_len, len(e.relative_path))
|
||||||
|
size_max_len = max(size_max_len, len(str(e.size)))
|
||||||
|
hash_max_len = max(hash_max_len, len(str(e.hash)))
|
||||||
|
print('Listing dataset content')
|
||||||
|
formatting = "{:" + str(file_name_max_len) + "} | {:" + str(size_max_len) + ",} | {:" + str(hash_max_len) + "}"
|
||||||
|
print(formatting.replace(",", "").format("file name", "size", "hash"))
|
||||||
|
print("-" * len(formatting.replace(",", "").format("-", "-", "-")))
|
||||||
|
num_files = 0
|
||||||
|
total_size = 0
|
||||||
|
for files in files_cache:
|
||||||
num_files += len(files)
|
num_files += len(files)
|
||||||
for f in files:
|
for f in files:
|
||||||
e = link_entries.get(f)
|
e = link_entries.get(f)
|
||||||
@ -480,15 +500,41 @@ def ds_search(args):
|
|||||||
print('Search datasets')
|
print('Search datasets')
|
||||||
print_args(args)
|
print_args(args)
|
||||||
datasets = Dataset.list_datasets(
|
datasets = Dataset.list_datasets(
|
||||||
dataset_project=args.project or None, partial_name=args.name or None,
|
dataset_project=args.project or None,
|
||||||
tags=args.tags or None, ids=args.ids or None
|
partial_name=args.name or None,
|
||||||
|
tags=args.tags or None,
|
||||||
|
ids=args.ids or None,
|
||||||
|
only_completed=not args.not_only_completed,
|
||||||
|
recursive_project_search=not args.non_recursive_project_search,
|
||||||
)
|
)
|
||||||
formatting = '{:16} | {:32} | {:19} | {:19} | {:32}'
|
projects_col_len, name_col_len, tags_col_len, created_col_len, id_col_len = 16, 32, 19, 19, 32
|
||||||
print(formatting.format('project', 'name', 'tags', 'created', 'id'))
|
|
||||||
print('-' * len(formatting.format('-', '-', '-', '-', '-')))
|
|
||||||
for d in datasets:
|
for d in datasets:
|
||||||
print(formatting.format(
|
projects_col_len = max(projects_col_len, len(d["project"]))
|
||||||
d['project'], d['name'], str(d['tags'] or [])[1:-1], str(d['created']).split('.')[0], d['id']))
|
name_col_len = max(name_col_len, len(d["name"]))
|
||||||
|
tags_col_len = max(tags_col_len, len(str(d["tags"] or [])[1:-1]))
|
||||||
|
created_col_len = max(created_col_len, len(str(d["created"]).split(".")[0]))
|
||||||
|
id_col_len = max(id_col_len, len(d["id"]))
|
||||||
|
formatting = (
|
||||||
|
"{:"
|
||||||
|
+ str(projects_col_len)
|
||||||
|
+ "} | {:"
|
||||||
|
+ str(name_col_len)
|
||||||
|
+ "} | {:"
|
||||||
|
+ str(tags_col_len)
|
||||||
|
+ "} | {:"
|
||||||
|
+ str(created_col_len)
|
||||||
|
+ "} | {:"
|
||||||
|
+ str(id_col_len)
|
||||||
|
+ "}"
|
||||||
|
)
|
||||||
|
print(formatting.format("project", "name", "tags", "created", "id"))
|
||||||
|
print("-" * len(formatting.format("-", "-", "-", "-", "-")))
|
||||||
|
for d in datasets:
|
||||||
|
print(
|
||||||
|
formatting.format(
|
||||||
|
d["project"], d["name"], str(d["tags"] or [])[1:-1], str(d["created"]).split(".")[0], d["id"]
|
||||||
|
)
|
||||||
|
)
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@ import os
|
|||||||
import shutil
|
import shutil
|
||||||
import psutil
|
import psutil
|
||||||
import mimetypes
|
import mimetypes
|
||||||
|
import re
|
||||||
from copy import deepcopy, copy
|
from copy import deepcopy, copy
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
@ -1736,8 +1737,16 @@ class Dataset(object):
|
|||||||
return squashed_ds
|
return squashed_ds
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def list_datasets(cls, dataset_project=None, partial_name=None, tags=None, ids=None, only_completed=True):
|
def list_datasets(
|
||||||
# type: (Optional[str], Optional[str], Optional[Sequence[str]], Optional[Sequence[str]], bool) -> List[dict]
|
cls,
|
||||||
|
dataset_project=None,
|
||||||
|
partial_name=None,
|
||||||
|
tags=None,
|
||||||
|
ids=None,
|
||||||
|
only_completed=True,
|
||||||
|
recursive_project_search=True,
|
||||||
|
):
|
||||||
|
# type: (Optional[str], Optional[str], Optional[Sequence[str]], Optional[Sequence[str]], bool, bool) -> List[dict]
|
||||||
"""
|
"""
|
||||||
Query list of dataset in the system
|
Query list of dataset in the system
|
||||||
|
|
||||||
@ -1746,30 +1755,47 @@ class Dataset(object):
|
|||||||
:param tags: Specify user tags
|
:param tags: Specify user tags
|
||||||
:param ids: List specific dataset based on IDs list
|
:param ids: List specific dataset based on IDs list
|
||||||
:param only_completed: If False return dataset that are still in progress (uploading/edited etc.)
|
:param only_completed: If False return dataset that are still in progress (uploading/edited etc.)
|
||||||
|
:param recursive_project_search: If True and the `dataset_project` argument is set,
|
||||||
|
search inside subprojects as well.
|
||||||
|
If False, don't search inside subprojects (except for the special `.datasets` subproject)
|
||||||
:return: List of dictionaries with dataset information
|
:return: List of dictionaries with dataset information
|
||||||
Example: [{'name': name, 'project': project name, 'id': dataset_id, 'created': date_created},]
|
Example: [{'name': name, 'project': project name, 'id': dataset_id, 'created': date_created},]
|
||||||
"""
|
"""
|
||||||
|
if dataset_project:
|
||||||
|
if not recursive_project_search:
|
||||||
|
dataset_projects = [
|
||||||
|
exact_match_regex(dataset_project),
|
||||||
|
"^{}/\\.datasets/.*".format(re.escape(dataset_project)),
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
dataset_projects = [exact_match_regex(dataset_project), "^{}/.*".format(re.escape(dataset_project))]
|
||||||
|
else:
|
||||||
|
dataset_projects = None
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
datasets = Task._query_tasks(
|
datasets = Task._query_tasks(
|
||||||
task_ids=ids or None, project_name=dataset_project or None,
|
task_ids=ids or None,
|
||||||
|
project_name=dataset_projects,
|
||||||
task_name=partial_name,
|
task_name=partial_name,
|
||||||
system_tags=[cls.__tag],
|
system_tags=[cls.__tag],
|
||||||
type=[str(Task.TaskTypes.data_processing)],
|
type=[str(Task.TaskTypes.data_processing)],
|
||||||
tags=tags or None,
|
tags=tags or None,
|
||||||
status=['stopped', 'published', 'completed', 'closed'] if only_completed else None,
|
status=["stopped", "published", "completed", "closed"] if only_completed else None,
|
||||||
only_fields=['created', 'id', 'name', 'project', 'tags'],
|
only_fields=["created", "id", "name", "project", "tags"],
|
||||||
search_hidden=True,
|
search_hidden=True,
|
||||||
_allow_extra_fields_=True
|
exact_match_regex_flag=False,
|
||||||
|
_allow_extra_fields_=True,
|
||||||
)
|
)
|
||||||
project_ids = {d.project for d in datasets}
|
project_ids = {d.project for d in datasets}
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
project_id_lookup = {d: Task._get_project_name(d) for d in project_ids}
|
project_id_lookup = Task._get_project_names(project_ids)
|
||||||
return [
|
return [
|
||||||
{'name': d.name,
|
{
|
||||||
'created': d.created,
|
"name": d.name,
|
||||||
'project': project_id_lookup[d.project],
|
"created": d.created,
|
||||||
'id': d.id,
|
"project": cls._remove_hidden_part_from_dataset_project(project_id_lookup[d.project]),
|
||||||
'tags': d.tags}
|
"id": d.id,
|
||||||
|
"tags": d.tags,
|
||||||
|
}
|
||||||
for d in datasets
|
for d in datasets
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -3938,7 +3938,15 @@ class Task(_Task):
|
|||||||
return [cls(private=cls.__create_protection, task_id=task.id, log_to_backend=False) for task in queried_tasks]
|
return [cls(private=cls.__create_protection, task_id=task.id, log_to_backend=False) for task in queried_tasks]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _query_tasks(cls, task_ids=None, project_name=None, task_name=None, fetch_only_first_page=False, **kwargs):
|
def _query_tasks(
|
||||||
|
cls,
|
||||||
|
task_ids=None,
|
||||||
|
project_name=None,
|
||||||
|
task_name=None,
|
||||||
|
fetch_only_first_page=False,
|
||||||
|
exact_match_regex_flag=True,
|
||||||
|
**kwargs
|
||||||
|
):
|
||||||
res = None
|
res = None
|
||||||
if not task_ids:
|
if not task_ids:
|
||||||
task_ids = None
|
task_ids = None
|
||||||
@ -3960,13 +3968,12 @@ class Task(_Task):
|
|||||||
res = cls._send(
|
res = cls._send(
|
||||||
cls._get_default_session(),
|
cls._get_default_session(),
|
||||||
projects.GetAllRequest(
|
projects.GetAllRequest(
|
||||||
name=exact_match_regex(name),
|
name=exact_match_regex(name) if exact_match_regex_flag else name,
|
||||||
**aux_kwargs
|
**aux_kwargs
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
project = get_single_result(entity='project', query=name, results=res.response.projects)
|
if res.response and res.response.projects:
|
||||||
if project:
|
project_ids.extend([project.id for project in res.response.projects])
|
||||||
project_ids.append(project.id)
|
|
||||||
|
|
||||||
session = cls._get_default_session()
|
session = cls._get_default_session()
|
||||||
system_tags = 'system_tags' if hasattr(tasks.Task, 'system_tags') else 'tags'
|
system_tags = 'system_tags' if hasattr(tasks.Task, 'system_tags') else 'tags'
|
||||||
|
Loading…
Reference in New Issue
Block a user