From bb648bdb027c5314afcd5594d88bd50673ec2b46 Mon Sep 17 00:00:00 2001
From: allegroai <>
Date: Sun, 10 Jan 2021 13:06:50 +0200
Subject: [PATCH] Fix dataset genealogy, graph and restoring data
---
clearml/datasets/dataset.py | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/clearml/datasets/dataset.py b/clearml/datasets/dataset.py
index f5641e72..8a8dbfcb 100644
--- a/clearml/datasets/dataset.py
+++ b/clearml/datasets/dataset.py
@@ -1092,7 +1092,8 @@ class Dataset(object):
# noinspection DuplicatedCode
while roots:
r = roots.pop(0)
- dependencies.append(r)
+ if r not in dependencies:
+ dependencies.append(r)
# add the parents of the current node, only if the parents are in the general graph node list
if include_unused and r not in self._dependency_graph:
roots.extend(list(reversed(
@@ -1109,7 +1110,8 @@ class Dataset(object):
# noinspection DuplicatedCode
while roots:
r = roots.pop(0)
- dependencies.append(r)
+ if r not in dependencies:
+ dependencies.append(r)
# add the parents of the current node, only if the parents are in the general graph node list
if include_unused and r not in self._dependency_graph:
roots.extend(list(reversed(
@@ -1222,19 +1224,22 @@ class Dataset(object):
# create DAG
visited = []
+ # add nodes
for idx, node in enumerate(nodes):
visited.append(node)
- if node in self._dependency_graph:
- parents = [visited.index(p) for p in self._dependency_graph[node] or [] if p in visited]
- else:
- parents = [visited.index(p) for p in self.get(dataset_id=node)._get_parents() or [] if p in visited]
-
sankey_node['color'].append("mediumpurple" if node == self.id else "lightblue")
sankey_node['label'].append('{}'.format(node))
sankey_node['customdata'].append(
"name {}
removed {}
modified {}
added {}
size {}".format(
node_names.get(node, ''), *node_details[node]))
+ # add edges
+ for idx, node in enumerate(nodes):
+ if node in self._dependency_graph:
+ parents = [visited.index(p) for p in self._dependency_graph[node] or [] if p in visited]
+ else:
+ parents = [visited.index(p) for p in self.get(dataset_id=node)._get_parents() or [] if p in visited]
+
for p in parents:
sankey_link['source'].append(p)
sankey_link['target'].append(idx)