mirror of
				https://github.com/clearml/clearml
				synced 2025-06-26 18:16:07 +00:00 
			
		
		
		
	Dataset lineage view is broken with multiple dataset dependencies
This commit is contained in:
		
							parent
							
								
									3dee5854bd
								
							
						
					
					
						commit
						6529c0dc11
					
				| @ -2747,22 +2747,48 @@ class Dataset(object): | |||||||
|         current_index = 0 |         current_index = 0 | ||||||
|         dataset_struct = {} |         dataset_struct = {} | ||||||
|         indices = {} |         indices = {} | ||||||
|         dependency_graph_ex = deepcopy(self._dependency_graph) |         dependency_graph_ex_copy = deepcopy(self._dependency_graph) | ||||||
|  |         # Make sure that id we reference a node as a parent, they exist on the DAG itself | ||||||
|         for parents in self._dependency_graph.values(): |         for parents in self._dependency_graph.values(): | ||||||
|             for parent in parents: |             for parent in parents: | ||||||
|                 if parent not in self._dependency_graph: |                 if parent not in self._dependency_graph: | ||||||
|                     dependency_graph_ex[parent] = [] |                     dependency_graph_ex_copy[parent] = [] | ||||||
|         for id_, parents in dependency_graph_ex.items(): |         # get data from the parent versions | ||||||
|  |         dependency_graph_ex = {} | ||||||
|  |         while dependency_graph_ex_copy: | ||||||
|  |             id_, parents = dependency_graph_ex_copy.popitem() | ||||||
|  |             dependency_graph_ex[id_] = parents | ||||||
|  | 
 | ||||||
|             task = Task.get_task(task_id=id_) |             task = Task.get_task(task_id=id_) | ||||||
|             dataset_struct_entry = {"job_id": id_, "status": task.status} |             dataset_struct_entry = {"job_id": id_, "status": task.status} | ||||||
|             # noinspection PyProtectedMember |             # noinspection PyProtectedMember | ||||||
|             last_update = task._get_last_update() |             last_update = task._get_last_update() | ||||||
|             if last_update: |             if last_update: | ||||||
|                 last_update = calendar.timegm(last_update.timetuple()) |                 last_update = calendar.timegm(last_update.timetuple()) | ||||||
|  |             # fetch the parents of this version (task) based on what we have on the Task itself. | ||||||
|  |             # noinspection PyBroadException | ||||||
|  |             try: | ||||||
|  |                 dataset_version_node = task.get_configuration_object_as_dict("Dataset Struct") | ||||||
|  |                 # fine the one that is us | ||||||
|  |                 for node in dataset_version_node.values(): | ||||||
|  |                     if node["job_id"] != id_: | ||||||
|  |                         continue | ||||||
|  |                     for parent in node.get("parents", []): | ||||||
|  |                         parent_id = dataset_version_node[parent]["job_id"] | ||||||
|  |                         if parent_id not in dependency_graph_ex_copy and parent_id not in dependency_graph_ex: | ||||||
|  |                             # add p to dependency_graph_ex | ||||||
|  |                             dependency_graph_ex_copy[parent_id] = [] | ||||||
|  |                         if parent_id not in parents: | ||||||
|  |                             parents.append(parent_id) | ||||||
|  |                     break | ||||||
|  |             except Exception: | ||||||
|  |                 pass | ||||||
|             dataset_struct_entry["last_update"] = last_update |             dataset_struct_entry["last_update"] = last_update | ||||||
|             dataset_struct_entry["parents"] = parents |             dataset_struct_entry["parents"] = parents | ||||||
|  |             # noinspection PyProtectedMember | ||||||
|             dataset_struct_entry["job_size"] = task._get_runtime_properties().get("ds_total_size") |             dataset_struct_entry["job_size"] = task._get_runtime_properties().get("ds_total_size") | ||||||
|             dataset_struct_entry["name"] = task.name |             dataset_struct_entry["name"] = task.name | ||||||
|  |             # noinspection PyProtectedMember | ||||||
|             dataset_struct_entry["version"] = task._get_runtime_properties().get("version") |             dataset_struct_entry["version"] = task._get_runtime_properties().get("version") | ||||||
|             dataset_struct[str(current_index)] = dataset_struct_entry |             dataset_struct[str(current_index)] = dataset_struct_entry | ||||||
|             indices[id_] = str(current_index) |             indices[id_] = str(current_index) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user
	 allegroai
						allegroai