Add data_tool export improvements including 'company' flag, increased batch size for performance, date-time to log strings, more logs, an option to create a separate zip file per root project, an option to translate urls during tool export

This commit is contained in:
clearml 2025-06-04 11:43:31 +03:00
parent bf00441146
commit a7e340212f
3 changed files with 219 additions and 121 deletions

View File

@ -65,6 +65,14 @@ from apiserver.utilities.parameter_key_escaper import ParameterKeyEscaper
replace_s3_scheme = os.getenv("CLEARML_REPLACE_S3_SCHEME") replace_s3_scheme = os.getenv("CLEARML_REPLACE_S3_SCHEME")
def _print(msg: str):
time = datetime.now().isoformat(sep=" ", timespec="seconds")
print(f"{time} {msg}")
UrlTranslation = Tuple[str, str]
class PrePopulate: class PrePopulate:
module_name_prefix = "apiserver." module_name_prefix = "apiserver."
event_bll = EventBLL() event_bll = EventBLL()
@ -163,7 +171,7 @@ class PrePopulate:
return True, files return True, files
except Exception as ex: except Exception as ex:
print("Error reading map file. " + str(ex)) _print("Error reading map file. " + str(ex))
return True, files return True, files
return False, files return False, files
@ -204,7 +212,7 @@ class PrePopulate:
return False return False
fileserver_links = [a for a in artifacts if is_fileserver_link(a)] fileserver_links = [a for a in artifacts if is_fileserver_link(a)]
print( _print(
f"Found {len(fileserver_links)} files on the fileserver from {len(artifacts)} total" f"Found {len(fileserver_links)} files on the fileserver from {len(artifacts)} total"
) )
@ -216,81 +224,114 @@ class PrePopulate:
filename: str, filename: str,
experiments: Sequence[str] = None, experiments: Sequence[str] = None,
projects: Sequence[str] = None, projects: Sequence[str] = None,
company: str = None,
artifacts_path: str = None, artifacts_path: str = None,
task_statuses: Sequence[str] = None, task_statuses: Sequence[str] = None,
tag_exported_entities: bool = False, tag_exported_entities: bool = False,
metadata: Mapping[str, Any] = None, metadata: Mapping[str, Any] = None,
export_events: bool = True, export_events: bool = True,
export_users: bool = False, export_users: bool = False,
project_split: bool = False,
url_trans: UrlTranslation = None,
) -> Sequence[str]: ) -> Sequence[str]:
cls._init_entity_types() cls._init_entity_types()
if task_statuses and not set(task_statuses).issubset(get_options(TaskStatus)): if task_statuses and not set(task_statuses).issubset(get_options(TaskStatus)):
raise ValueError("Invalid task statuses") raise ValueError("Invalid task statuses")
file = Path(filename) def export_to_zip_core(file_base_name: Path, projects_: Sequence[str]):
if not (experiments or projects): entities = cls._resolve_entities(
projects = cls.project_cls.objects(parent=None).scalar("id") experiments=experiments, projects=projects_, task_statuses=task_statuses
entities = cls._resolve_entities(
experiments=experiments, projects=projects, task_statuses=task_statuses
)
hash_ = hashlib.md5()
if metadata:
meta_str = json.dumps(metadata)
hash_.update(meta_str.encode())
metadata_hash = hash_.hexdigest()
else:
meta_str, metadata_hash = "", ""
map_file = file.with_suffix(".map")
updated, old_files = cls._check_for_update(
map_file, entities=entities, metadata_hash=metadata_hash
)
if not updated:
print(f"There are no updates from the last export")
return old_files
for old in old_files:
old_path = Path(old)
if old_path.is_file():
old_path.unlink()
with ZipFile(file, **cls.zip_args) as zfile:
if metadata:
zfile.writestr(cls.metadata_filename, meta_str)
if export_users:
cls._export_users(zfile)
artifacts = cls._export(
zfile,
entities=entities,
hash_=hash_,
tag_entities=tag_exported_entities,
export_events=export_events,
cleanup_users=not export_users,
) )
file_with_hash = file.with_name(f"{file.stem}_{hash_.hexdigest()}{file.suffix}") hash_ = hashlib.md5()
file.replace(file_with_hash) if metadata:
created_files = [str(file_with_hash)] meta_str = json.dumps(metadata)
hash_.update(meta_str.encode())
metadata_hash = hash_.hexdigest()
else:
meta_str, metadata_hash = "", ""
artifacts = cls._filter_artifacts(artifacts) map_file = file_base_name.with_suffix(".map")
if artifacts and artifacts_path and os.path.isdir(artifacts_path): updated, old_files = cls._check_for_update(
artifacts_file = file_with_hash.with_suffix(cls.artifacts_ext) map_file, entities=entities, metadata_hash=metadata_hash
with ZipFile(artifacts_file, **cls.zip_args) as zfile: )
cls._export_artifacts(zfile, artifacts, artifacts_path) if not updated:
created_files.append(str(artifacts_file)) _print(f"There are no updates from the last export")
return old_files
cls._write_update_file( for old in old_files:
map_file, old_path = Path(old)
entities=entities, if old_path.is_file():
created_files=created_files, old_path.unlink()
metadata_hash=metadata_hash,
)
if created_files: temp_file = file_base_name.with_suffix(file_base_name.suffix + "$")
print("Created files:\n" + "\n".join(file for file in created_files)) try:
with ZipFile(temp_file, **cls.zip_args) as zfile:
if metadata:
zfile.writestr(cls.metadata_filename, meta_str)
if export_users:
cls._export_users(zfile)
artifacts = cls._export(
zfile,
entities=entities,
hash_=hash_,
tag_entities=tag_exported_entities,
export_events=export_events,
cleanup_users=not export_users,
url_trans=url_trans,
)
except:
temp_file.unlink(missing_ok=True)
raise
file_with_hash = file_base_name.with_stem(
f"{file_base_name.stem}_{hash_.hexdigest()}"
)
temp_file.replace(file_with_hash)
files = [str(file_with_hash)]
artifacts = cls._filter_artifacts(artifacts)
if artifacts and artifacts_path and os.path.isdir(artifacts_path):
artifacts_file = file_with_hash.with_suffix(cls.artifacts_ext)
with ZipFile(artifacts_file, **cls.zip_args) as zfile:
cls._export_artifacts(zfile, artifacts, artifacts_path)
files.append(str(artifacts_file))
cls._write_update_file(
map_file,
entities=entities,
created_files=files,
metadata_hash=metadata_hash,
)
if files:
_print("Created files:\n" + "\n".join(file for file in files))
return files
filename = Path(filename)
if not (experiments or projects):
query = dict(parent=None)
if company:
query["company"] = company
projects = list(cls.project_cls.objects(**query).scalar("id"))
# projects.append(None)
if projects and project_split:
created_files = list(
chain.from_iterable(
export_to_zip_core(
file_base_name=filename.with_stem(f"{filename.stem}_{pid}"),
projects_=[pid],
)
for pid in projects
)
)
else:
created_files = export_to_zip_core(
file_base_name=filename, projects_=projects
)
return created_files return created_files
@ -320,8 +361,10 @@ class PrePopulate:
meta_user_id = metadata.get("user_id", "") meta_user_id = metadata.get("user_id", "")
meta_user_name = metadata.get("user_name", "") meta_user_name = metadata.get("user_name", "")
user_id, user_name = meta_user_id, meta_user_name user_id, user_name = meta_user_id, meta_user_name
except Exception: except Exception as ex:
pass _print(
f"Error getting metadata from {cls.metadata_filename}: {str(ex)}"
)
# Make sure we won't end up with an invalid company ID # Make sure we won't end up with an invalid company ID
if company_id is None: if company_id is None:
@ -347,7 +390,7 @@ class PrePopulate:
if artifacts_path and os.path.isdir(artifacts_path): if artifacts_path and os.path.isdir(artifacts_path):
artifacts_file = Path(filename).with_suffix(cls.artifacts_ext) artifacts_file = Path(filename).with_suffix(cls.artifacts_ext)
if artifacts_file.is_file(): if artifacts_file.is_file():
print(f"Unzipping artifacts into {artifacts_path}") _print(f"Unzipping artifacts into {artifacts_path}")
with ZipFile(artifacts_file) as zfile: with ZipFile(artifacts_file) as zfile:
zfile.extractall(artifacts_path) zfile.extractall(artifacts_path)
@ -370,7 +413,7 @@ class PrePopulate:
base_file_name, _, old_hash = file.stem.rpartition("_") base_file_name, _, old_hash = file.stem.rpartition("_")
new_hash = hash_.hexdigest() new_hash = hash_.hexdigest()
if old_hash == new_hash: if old_hash == new_hash:
print(f"The file {filename} was not updated") _print(f"The file {filename} was not updated")
temp_file.unlink() temp_file.unlink()
return [] return []
@ -384,7 +427,7 @@ class PrePopulate:
artifacts_file.replace(new_artifacts) artifacts_file.replace(new_artifacts)
upadated.append(str(new_artifacts)) upadated.append(str(new_artifacts))
print(f"File {str(file)} replaced with {str(new_file)}") _print(f"File {str(file)} replaced with {str(new_file)}")
file.unlink() file.unlink()
return upadated return upadated
@ -446,12 +489,12 @@ class PrePopulate:
not_found = missing - set(resolved_by_name) not_found = missing - set(resolved_by_name)
if not_found: if not_found:
print(f"ERROR: no match for {', '.join(not_found)}") _print(f"ERROR: no match for {', '.join(not_found)}")
exit(1) exit(1)
duplicates = [k for k, v in resolved_by_name.items() if len(v) > 1] duplicates = [k for k, v in resolved_by_name.items() if len(v) > 1]
if duplicates: if duplicates:
print(f"ERROR: more than one match for {', '.join(duplicates)}") _print(f"ERROR: more than one match for {', '.join(duplicates)}")
exit(1) exit(1)
def get_new_items(input_: Iterable) -> list: def get_new_items(input_: Iterable) -> list:
@ -489,20 +532,24 @@ class PrePopulate:
return return
prefixes = [ prefixes = [
cls.ParentPrefix(prefix=f"{project.name.rpartition('/')[0]}/", path=project.path) cls.ParentPrefix(
prefix=f"{project.name.rpartition('/')[0]}/", path=project.path
)
for project in orphans for project in orphans
] ]
prefixes.sort(key=lambda p: len(p.path), reverse=True) prefixes.sort(key=lambda p: len(p.path), reverse=True)
for project in projects: for project in projects:
prefix = first(pref for pref in prefixes if project.path[:len(pref.path)] == pref.path) prefix = first(
pref for pref in prefixes if project.path[: len(pref.path)] == pref.path
)
if not prefix: if not prefix:
continue continue
project.path = project.path[len(prefix.path):] project.path = project.path[len(prefix.path) :]
if not project.path: if not project.path:
project.parent = None project.parent = None
project.name = project.name.removeprefix(prefix.prefix) project.name = project.name.removeprefix(prefix.prefix)
# print( # _print(
# f"ERROR: the following projects are exported without their parents: {orphans}" # f"ERROR: the following projects are exported without their parents: {orphans}"
# ) # )
# exit(1) # exit(1)
@ -518,16 +565,20 @@ class PrePopulate:
entities: Dict[Any] = defaultdict(set) entities: Dict[Any] = defaultdict(set)
if projects: if projects:
print("Reading projects...") _print("Reading projects...")
projects = project_ids_with_children(projects) root = None in projects
entities[cls.project_cls].update( projects = [p for p in projects if p]
cls._resolve_entity_type(cls.project_cls, projects) if projects:
) projects = project_ids_with_children(projects)
print("--> Reading project experiments...") entities[cls.project_cls].update(
cls._resolve_entity_type(cls.project_cls, projects)
)
_print("--> Reading project experiments...")
p_ids = list(set(p.id for p in entities[cls.project_cls]))
if root:
p_ids.append(None)
query = Q( query = Q(
project__in=list( project__in=p_ids,
set(filter(None, (p.id for p in entities[cls.project_cls])))
),
system_tags__nin=[EntityVisibility.archived.value], system_tags__nin=[EntityVisibility.archived.value],
) )
if task_statuses: if task_statuses:
@ -538,9 +589,11 @@ class PrePopulate:
) )
if experiments: if experiments:
print("Reading experiments...") _print("Reading experiments...")
entities[cls.task_cls].update(cls._resolve_entity_type(cls.task_cls, experiments)) entities[cls.task_cls].update(
print("--> Reading experiments projects...") cls._resolve_entity_type(cls.task_cls, experiments)
)
_print("--> Reading experiments projects...")
objs = cls.project_cls.objects( objs = cls.project_cls.objects(
id__in=list( id__in=list(
set(filter(None, (p.project for p in entities[cls.task_cls]))) set(filter(None, (p.project for p in entities[cls.task_cls])))
@ -560,7 +613,7 @@ class PrePopulate:
) )
model_ids = {tm.model for tm in task_models} model_ids = {tm.model for tm in task_models}
if model_ids: if model_ids:
print("Reading models...") _print("Reading models...")
entities[cls.model_cls] = set(cls.model_cls.objects(id__in=list(model_ids))) entities[cls.model_cls] = set(cls.model_cls.objects(id__in=list(model_ids)))
# noinspection PyTypeChecker # noinspection PyTypeChecker
@ -625,22 +678,41 @@ class PrePopulate:
except AttributeError: except AttributeError:
pass pass
@staticmethod
def _translate_url(url_: str, url_trans: UrlTranslation) -> str:
if not (url_ and url_trans):
return url_
source, target = url_trans
if not url_.startswith(source):
return url_
return target + url_[len(source):]
@classmethod @classmethod
def _export_task_events( def _export_task_events(
cls, task: Task, base_filename: str, writer: ZipFile, hash_ cls,
task: Task,
base_filename: str,
writer: ZipFile,
hash_,
url_trans: UrlTranslation,
) -> Sequence[str]: ) -> Sequence[str]:
artifacts = [] artifacts = []
filename = f"{base_filename}_{task.id}{cls.events_file_suffix}.json" filename = f"{base_filename}_{task.id}{cls.events_file_suffix}.json"
print(f"Writing task events into {writer.filename}:{filename}") _print(f"Writing task events into {writer.filename}:{filename}")
with BytesIO() as f: with BytesIO() as f:
with cls.JsonLinesWriter(f) as w: with cls.JsonLinesWriter(f) as w:
scroll_id = None scroll_id = None
events_count = 0
while True: while True:
res = cls.event_bll.get_task_events( res = cls.event_bll.get_task_events(
company_id=task.company, company_id=task.company,
task_id=task.id, task_id=task.id,
event_type=EventType.all, event_type=EventType.all,
scroll_id=scroll_id, scroll_id=scroll_id,
size=10_000,
) )
if not res.events: if not res.events:
break break
@ -650,16 +722,22 @@ class PrePopulate:
if event_type == EventType.metrics_image.value: if event_type == EventType.metrics_image.value:
url = cls._get_fixed_url(event.get("url")) url = cls._get_fixed_url(event.get("url"))
if url: if url:
event["url"] = url
artifacts.append(url) artifacts.append(url)
event["url"] = cls._translate_url(url, url_trans)
elif event_type == EventType.metrics_plot.value: elif event_type == EventType.metrics_plot.value:
plot_str: str = event.get("plot_str", "") plot_str: str = event.get("plot_str", "")
for match in cls.img_source_regex.findall(plot_str): if plot_str:
url = cls._get_fixed_url(match) for match in cls.img_source_regex.findall(plot_str):
if match != url: url = cls._get_fixed_url(match)
plot_str = plot_str.replace(match, url) artifacts.append(url)
artifacts.append(url) new_url = cls._translate_url(url, url_trans)
if match != new_url:
plot_str = plot_str.replace(match, new_url)
event["plot_str"] = plot_str
w.write(json.dumps(event)) w.write(json.dumps(event))
events_count += 1
_print(f"Got {events_count} events for task {task.id}")
_print(f"Writing {events_count} events for task {task.id}")
data = f.getvalue() data = f.getvalue()
hash_.update(data) hash_.update(data)
writer.writestr(filename, data) writer.writestr(filename, data)
@ -677,53 +755,62 @@ class PrePopulate:
fixed.host += ".s3.amazonaws.com" fixed.host += ".s3.amazonaws.com"
return fixed.url return fixed.url
except Exception as ex: except Exception as ex:
print(f"Failed processing link {url}. " + str(ex)) _print(f"Failed processing link {url}. " + str(ex))
return url return url
@classmethod @classmethod
def _export_entity_related_data( def _export_entity_related_data(
cls, entity_cls, entity, base_filename: str, writer: ZipFile, hash_ cls,
entity_cls,
entity,
base_filename: str,
writer: ZipFile,
hash_,
url_trans: UrlTranslation,
): ):
if entity_cls == cls.task_cls: if entity_cls == cls.task_cls:
return [ return [
*cls._get_task_output_artifacts(entity), *cls._get_task_output_artifacts(entity, url_trans),
*cls._export_task_events(entity, base_filename, writer, hash_), *cls._export_task_events(
entity, base_filename, writer, hash_, url_trans
),
] ]
if entity_cls == cls.model_cls: if entity_cls == cls.model_cls:
entity.uri = cls._get_fixed_url(entity.uri) url = cls._get_fixed_url(entity.uri)
return [entity.uri] if entity.uri else [] entity.uri = cls._translate_url(url, url_trans)
return [url] if url else []
return [] return []
@classmethod @classmethod
def _get_task_output_artifacts(cls, task: Task) -> Sequence[str]: def _get_task_output_artifacts(cls, task: Task, url_trans: UrlTranslation) -> Sequence[str]:
if not task.execution.artifacts: if not task.execution.artifacts:
return [] return []
artifact_urls = []
for a in task.execution.artifacts.values(): for a in task.execution.artifacts.values():
if a.mode == ArtifactModes.output: if a.mode == ArtifactModes.output:
a.uri = cls._get_fixed_url(a.uri) url = cls._get_fixed_url(a.uri)
a.uri = cls._translate_url(url, url_trans)
if url and a.mode == ArtifactModes.output:
artifact_urls.append(url)
return [ return artifact_urls
a.uri
for a in task.execution.artifacts.values()
if a.mode == ArtifactModes.output and a.uri
]
@classmethod @classmethod
def _export_artifacts( def _export_artifacts(
cls, writer: ZipFile, artifacts: Sequence[str], artifacts_path: str cls, writer: ZipFile, artifacts: Sequence[str], artifacts_path: str
): ):
unique_paths = set(unquote(str(furl(artifact).path)) for artifact in artifacts) unique_paths = set(unquote(str(furl(artifact).path)) for artifact in artifacts)
print(f"Writing {len(unique_paths)} artifacts into {writer.filename}") _print(f"Writing {len(unique_paths)} artifacts into {writer.filename}")
for path in unique_paths: for path in unique_paths:
path = path.lstrip("/") path = path.lstrip("/")
full_path = os.path.join(artifacts_path, path) full_path = os.path.join(artifacts_path, path)
if os.path.isfile(full_path): if os.path.isfile(full_path):
writer.write(full_path, path) writer.write(full_path, path)
else: else:
print(f"Artifact {full_path} not found") _print(f"Artifact {full_path} not found")
@classmethod @classmethod
def _export_users(cls, writer: ZipFile): def _export_users(cls, writer: ZipFile):
@ -742,7 +829,7 @@ class PrePopulate:
return return
auth_users = {uid: data for uid, data in auth_users.items() if uid in be_users} auth_users = {uid: data for uid, data in auth_users.items() if uid in be_users}
print(f"Writing {len(auth_users)} users into {writer.filename}") _print(f"Writing {len(auth_users)} users into {writer.filename}")
data = {} data = {}
for field, users in (("auth", auth_users), ("backend", be_users)): for field, users in (("auth", auth_users), ("backend", be_users)):
with BytesIO() as f: with BytesIO() as f:
@ -773,6 +860,7 @@ class PrePopulate:
tag_entities: bool = False, tag_entities: bool = False,
export_events: bool = True, export_events: bool = True,
cleanup_users: bool = True, cleanup_users: bool = True,
url_trans: UrlTranslation = None,
) -> Sequence[str]: ) -> Sequence[str]:
""" """
Export the requested experiments, projects and models and return the list of artifact files Export the requested experiments, projects and models and return the list of artifact files
@ -780,7 +868,7 @@ class PrePopulate:
The projects should be sorted by name so that on import the hierarchy is correctly restored from top to bottom The projects should be sorted by name so that on import the hierarchy is correctly restored from top to bottom
""" """
artifacts = [] artifacts = []
now = datetime.utcnow() now = datetime.now(timezone.utc)
for cls_ in sorted(entities, key=attrgetter("__name__")): for cls_ in sorted(entities, key=attrgetter("__name__")):
items = sorted(entities[cls_], key=attrgetter("name", "id")) items = sorted(entities[cls_], key=attrgetter("name", "id"))
if not items: if not items:
@ -790,11 +878,11 @@ class PrePopulate:
for item in items: for item in items:
artifacts.extend( artifacts.extend(
cls._export_entity_related_data( cls._export_entity_related_data(
cls_, item, base_filename, writer, hash_ cls_, item, base_filename, writer, hash_, url_trans
) )
) )
filename = base_filename + ".json" filename = base_filename + ".json"
print(f"Writing {len(items)} items into {writer.filename}:{filename}") _print(f"Writing {len(items)} items into {writer.filename}:{filename}")
with BytesIO() as f: with BytesIO() as f:
with cls.JsonLinesWriter(f) as w: with cls.JsonLinesWriter(f) as w:
for item in items: for item in items:
@ -968,7 +1056,7 @@ class PrePopulate:
for entity_file in entity_files: for entity_file in entity_files:
with reader.open(entity_file) as f: with reader.open(entity_file) as f:
full_name = splitext(entity_file.orig_filename)[0] full_name = splitext(entity_file.orig_filename)[0]
print(f"Reading {reader.filename}:{full_name}...") _print(f"Reading {reader.filename}:{full_name}...")
res = cls._import_entity( res = cls._import_entity(
f, f,
full_name=full_name, full_name=full_name,
@ -996,7 +1084,7 @@ class PrePopulate:
continue continue
with reader.open(events_file) as f: with reader.open(events_file) as f:
full_name = splitext(events_file.orig_filename)[0] full_name = splitext(events_file.orig_filename)[0]
print(f"Reading {reader.filename}:{full_name}...") _print(f"Reading {reader.filename}:{full_name}...")
cls._import_events(f, company_id, task.user, task.id) cls._import_events(f, company_id, task.user, task.id)
@classmethod @classmethod
@ -1082,14 +1170,16 @@ class PrePopulate:
) )
models = task_data.get("models", {}) models = task_data.get("models", {})
now = datetime.utcnow() now = datetime.now(timezone.utc)
for old_field, type_ in ( for old_field, type_ in (
("execution.model", TaskModelTypes.input), ("execution.model", TaskModelTypes.input),
("output.model", TaskModelTypes.output), ("output.model", TaskModelTypes.output),
): ):
old_path = old_field.split(".") old_path = old_field.split(".")
old_model = nested_get(task_data, old_path) old_model = nested_get(task_data, old_path)
new_models = [m for m in models.get(type_, []) if m.get("model") is not None] new_models = [
m for m in models.get(type_, []) if m.get("model") is not None
]
name = TaskModelNames[type_] name = TaskModelNames[type_]
if old_model and not any( if old_model and not any(
m m
@ -1127,7 +1217,7 @@ class PrePopulate:
) -> Optional[Sequence[Task]]: ) -> Optional[Sequence[Task]]:
user_mapping = user_mapping or {} user_mapping = user_mapping or {}
cls_ = cls._get_entity_type(full_name) cls_ = cls._get_entity_type(full_name)
print(f"Writing {cls_.__name__.lower()}s into database") _print(f"Writing {cls_.__name__.lower()}s into database")
tasks = [] tasks = []
override_project_count = 0 override_project_count = 0
data_upgrade_funcs: Mapping[Type, Callable] = { data_upgrade_funcs: Mapping[Type, Callable] = {
@ -1164,21 +1254,23 @@ class PrePopulate:
doc.logo_blob = metadata.get("logo_blob", None) doc.logo_blob = metadata.get("logo_blob", None)
cls_.objects(company=company_id, name=doc.name, id__ne=doc.id).update( cls_.objects(company=company_id, name=doc.name, id__ne=doc.id).update(
set__name=f"{doc.name}_{datetime.utcnow().strftime('%Y-%m-%d_%H-%M-%S')}" set__name=f"{doc.name}_{datetime.now(timezone.utc).strftime('%Y-%m-%d_%H-%M-%S')}"
) )
doc.save() doc.save()
if isinstance(doc, cls.task_cls): if isinstance(doc, cls.task_cls):
tasks.append(doc) tasks.append(doc)
cls.event_bll.delete_task_events(company_id, doc.id, wait_for_delete=True) cls.event_bll.delete_task_events(
company_id, doc.id, wait_for_delete=True
)
if tasks: if tasks:
return tasks return tasks
@classmethod @classmethod
def _import_events(cls, f: IO[bytes], company_id: str, user_id: str, task_id: str): def _import_events(cls, f: IO[bytes], company_id: str, user_id: str, task_id: str):
print(f"Writing events for task {task_id} into database") _print(f"Writing events for task {task_id} into database")
for events_chunk in chunked_iter(cls.json_lines(f), 1000): for events_chunk in chunked_iter(cls.json_lines(f), 1000):
events = [json.loads(item) for item in events_chunk] events = [json.loads(item) for item in events_chunk]
for ev in events: for ev in events:

View File

@ -32,8 +32,8 @@ class TestTasksArtifacts(TestService):
# test edit # test edit
artifacts = [ artifacts = [
dict(key="bb", type="str", uri="test1", mode="output"), dict(key="bb", type="str", uri="http://files.clear.ml/test1", mode="output"),
dict(key="aa", type="int", uri="test2", mode="input"), dict(key="aa", type="int", uri="http://files.clear.ml/test2", mode="input"),
] ]
self.api.tasks.edit(task=task, execution={"artifacts": artifacts}) self.api.tasks.edit(task=task, execution={"artifacts": artifacts})
res = self.api.tasks.get_by_id(task=task).task res = self.api.tasks.get_by_id(task=task).task

View File

@ -14,6 +14,12 @@ class TestTaskPlots(TestService):
@staticmethod @staticmethod
def _create_task_event(task, iteration, **kwargs): def _create_task_event(task, iteration, **kwargs):
plot_str = kwargs.get("plot_str")
if plot_str:
if not plot_str.startswith("http"):
plot_str = "http://files.clear.ml/" + plot_str
kwargs["plot_str"] = '{"source": "' + plot_str + '"}'
return { return {
"worker": "test", "worker": "test",
"type": "plot", "type": "plot",