mirror of
https://github.com/clearml/clearml
synced 2025-04-23 07:45:24 +00:00
Dataset lineage view is broken with multiple dataset dependencies
This commit is contained in:
parent
3dee5854bd
commit
6529c0dc11
@ -2747,22 +2747,48 @@ class Dataset(object):
|
|||||||
current_index = 0
|
current_index = 0
|
||||||
dataset_struct = {}
|
dataset_struct = {}
|
||||||
indices = {}
|
indices = {}
|
||||||
dependency_graph_ex = deepcopy(self._dependency_graph)
|
dependency_graph_ex_copy = deepcopy(self._dependency_graph)
|
||||||
|
# Make sure that id we reference a node as a parent, they exist on the DAG itself
|
||||||
for parents in self._dependency_graph.values():
|
for parents in self._dependency_graph.values():
|
||||||
for parent in parents:
|
for parent in parents:
|
||||||
if parent not in self._dependency_graph:
|
if parent not in self._dependency_graph:
|
||||||
dependency_graph_ex[parent] = []
|
dependency_graph_ex_copy[parent] = []
|
||||||
for id_, parents in dependency_graph_ex.items():
|
# get data from the parent versions
|
||||||
|
dependency_graph_ex = {}
|
||||||
|
while dependency_graph_ex_copy:
|
||||||
|
id_, parents = dependency_graph_ex_copy.popitem()
|
||||||
|
dependency_graph_ex[id_] = parents
|
||||||
|
|
||||||
task = Task.get_task(task_id=id_)
|
task = Task.get_task(task_id=id_)
|
||||||
dataset_struct_entry = {"job_id": id_, "status": task.status}
|
dataset_struct_entry = {"job_id": id_, "status": task.status}
|
||||||
# noinspection PyProtectedMember
|
# noinspection PyProtectedMember
|
||||||
last_update = task._get_last_update()
|
last_update = task._get_last_update()
|
||||||
if last_update:
|
if last_update:
|
||||||
last_update = calendar.timegm(last_update.timetuple())
|
last_update = calendar.timegm(last_update.timetuple())
|
||||||
|
# fetch the parents of this version (task) based on what we have on the Task itself.
|
||||||
|
# noinspection PyBroadException
|
||||||
|
try:
|
||||||
|
dataset_version_node = task.get_configuration_object_as_dict("Dataset Struct")
|
||||||
|
# fine the one that is us
|
||||||
|
for node in dataset_version_node.values():
|
||||||
|
if node["job_id"] != id_:
|
||||||
|
continue
|
||||||
|
for parent in node.get("parents", []):
|
||||||
|
parent_id = dataset_version_node[parent]["job_id"]
|
||||||
|
if parent_id not in dependency_graph_ex_copy and parent_id not in dependency_graph_ex:
|
||||||
|
# add p to dependency_graph_ex
|
||||||
|
dependency_graph_ex_copy[parent_id] = []
|
||||||
|
if parent_id not in parents:
|
||||||
|
parents.append(parent_id)
|
||||||
|
break
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
dataset_struct_entry["last_update"] = last_update
|
dataset_struct_entry["last_update"] = last_update
|
||||||
dataset_struct_entry["parents"] = parents
|
dataset_struct_entry["parents"] = parents
|
||||||
|
# noinspection PyProtectedMember
|
||||||
dataset_struct_entry["job_size"] = task._get_runtime_properties().get("ds_total_size")
|
dataset_struct_entry["job_size"] = task._get_runtime_properties().get("ds_total_size")
|
||||||
dataset_struct_entry["name"] = task.name
|
dataset_struct_entry["name"] = task.name
|
||||||
|
# noinspection PyProtectedMember
|
||||||
dataset_struct_entry["version"] = task._get_runtime_properties().get("version")
|
dataset_struct_entry["version"] = task._get_runtime_properties().get("version")
|
||||||
dataset_struct[str(current_index)] = dataset_struct_entry
|
dataset_struct[str(current_index)] = dataset_struct_entry
|
||||||
indices[id_] = str(current_index)
|
indices[id_] = str(current_index)
|
||||||
|
Loading…
Reference in New Issue
Block a user