mirror of
				https://github.com/clearml/clearml-server
				synced 2025-06-26 23:15:47 +00:00 
			
		
		
		
	Add support for new IDs generation when importing projects
This commit is contained in:
		
							parent
							
								
									d760cf5835
								
							
						
					
					
						commit
						fe29743c54
					
				@ -24,6 +24,7 @@ from typing import (
 | 
			
		||||
    Callable,
 | 
			
		||||
)
 | 
			
		||||
from urllib.parse import unquote, urlparse
 | 
			
		||||
from uuid import uuid4
 | 
			
		||||
from zipfile import ZipFile, ZIP_BZIP2
 | 
			
		||||
 | 
			
		||||
import mongoengine
 | 
			
		||||
@ -690,6 +691,19 @@ class PrePopulate:
 | 
			
		||||
                continue
 | 
			
		||||
            yield clean
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _generate_new_ids(
 | 
			
		||||
        cls, reader: ZipFile, entity_files: Sequence
 | 
			
		||||
    ) -> Mapping[str, str]:
 | 
			
		||||
        ids = {}
 | 
			
		||||
        for entity_file in entity_files:
 | 
			
		||||
            with reader.open(entity_file) as f:
 | 
			
		||||
                for item in cls.json_lines(f):
 | 
			
		||||
                    orig_id = json.loads(item).get("_id")
 | 
			
		||||
                    if orig_id:
 | 
			
		||||
                        ids[orig_id] = str(uuid4()).replace("-", "")
 | 
			
		||||
        return ids
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _import(
 | 
			
		||||
        cls,
 | 
			
		||||
@ -704,37 +718,46 @@ class PrePopulate:
 | 
			
		||||
        Start from entities since event import will require the tasks already in DB
 | 
			
		||||
        """
 | 
			
		||||
        event_file_ending = cls.events_file_suffix + ".json"
 | 
			
		||||
        entity_files = (
 | 
			
		||||
        entity_files = [
 | 
			
		||||
            fi
 | 
			
		||||
            for fi in reader.filelist
 | 
			
		||||
            if not fi.orig_filename.endswith(event_file_ending)
 | 
			
		||||
            and fi.orig_filename != cls.metadata_filename
 | 
			
		||||
        )
 | 
			
		||||
        ]
 | 
			
		||||
        metadata = metadata or {}
 | 
			
		||||
        old_to_new_ids = (
 | 
			
		||||
            cls._generate_new_ids(reader, entity_files)
 | 
			
		||||
            if metadata.get("new_ids")
 | 
			
		||||
            else {}
 | 
			
		||||
        )
 | 
			
		||||
        tasks = []
 | 
			
		||||
        for entity_file in entity_files:
 | 
			
		||||
            with reader.open(entity_file) as f:
 | 
			
		||||
                full_name = splitext(entity_file.orig_filename)[0]
 | 
			
		||||
                print(f"Reading {reader.filename}:{full_name}...")
 | 
			
		||||
                res = cls._import_entity(f, full_name, company_id, user_id, metadata)
 | 
			
		||||
                res = cls._import_entity(
 | 
			
		||||
                    f, full_name, company_id, user_id, metadata, old_to_new_ids
 | 
			
		||||
                )
 | 
			
		||||
                if res:
 | 
			
		||||
                    tasks = res
 | 
			
		||||
 | 
			
		||||
        if sort_tasks_by_last_updated:
 | 
			
		||||
            tasks = sorted(tasks, key=attrgetter("last_update"))
 | 
			
		||||
 | 
			
		||||
        new_to_old_ids = {v: k for k, v in old_to_new_ids.items()}
 | 
			
		||||
        for task in tasks:
 | 
			
		||||
            old_task_id = new_to_old_ids.get(task.id, task.id)
 | 
			
		||||
            events_file = first(
 | 
			
		||||
                fi
 | 
			
		||||
                for fi in reader.filelist
 | 
			
		||||
                if fi.orig_filename.endswith(task.id + event_file_ending)
 | 
			
		||||
                if fi.orig_filename.endswith(old_task_id + event_file_ending)
 | 
			
		||||
            )
 | 
			
		||||
            if not events_file:
 | 
			
		||||
                continue
 | 
			
		||||
            with reader.open(events_file) as f:
 | 
			
		||||
                full_name = splitext(events_file.orig_filename)[0]
 | 
			
		||||
                print(f"Reading {reader.filename}:{full_name}...")
 | 
			
		||||
                cls._import_events(f, full_name, company_id, user_id)
 | 
			
		||||
                cls._import_events(f, company_id, user_id, task.id)
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _get_entity_type(cls, full_name) -> Type[mongoengine.Document]:
 | 
			
		||||
@ -847,6 +870,7 @@ class PrePopulate:
 | 
			
		||||
        company_id: str,
 | 
			
		||||
        user_id: str,
 | 
			
		||||
        metadata: Mapping[str, Any],
 | 
			
		||||
        old_to_new_ids: Mapping[str, str] = None,
 | 
			
		||||
    ) -> Optional[Sequence[Task]]:
 | 
			
		||||
        cls_ = cls._get_entity_type(full_name)
 | 
			
		||||
        print(f"Writing {cls_.__name__.lower()}s into database")
 | 
			
		||||
@ -858,6 +882,11 @@ class PrePopulate:
 | 
			
		||||
            cls.project_cls: cls._upgrade_project_data,
 | 
			
		||||
        }
 | 
			
		||||
        for item in cls.json_lines(f):
 | 
			
		||||
            if old_to_new_ids:
 | 
			
		||||
                for old_id, new_id in old_to_new_ids.items():
 | 
			
		||||
                    # replace ids only when they are standalone strings
 | 
			
		||||
                    # otherwise artifacts uris that contain old ids may get damaged
 | 
			
		||||
                    item = item.replace(f'"{old_id}"', f'"{new_id}"')
 | 
			
		||||
            upgrade_func = data_upgrade_funcs.get(cls_)
 | 
			
		||||
            if upgrade_func:
 | 
			
		||||
                item = json.dumps(upgrade_func(json.loads(item)))
 | 
			
		||||
@ -894,11 +923,15 @@ class PrePopulate:
 | 
			
		||||
            return tasks
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def _import_events(cls, f: IO[bytes], full_name: str, company_id: str, _):
 | 
			
		||||
        _, _, task_id = full_name[0 : -len(cls.events_file_suffix)].rpartition("_")
 | 
			
		||||
    def _import_events(
 | 
			
		||||
        cls, f: IO[bytes], company_id: str, _, task_id: str
 | 
			
		||||
    ):
 | 
			
		||||
        print(f"Writing events for task {task_id} into database")
 | 
			
		||||
        for events_chunk in chunked_iter(cls.json_lines(f), 1000):
 | 
			
		||||
            events = [json.loads(item) for item in events_chunk]
 | 
			
		||||
            for ev in events:
 | 
			
		||||
                ev["task"] = task_id
 | 
			
		||||
                ev["company_id"] = company_id
 | 
			
		||||
            cls.event_bll.add_events(
 | 
			
		||||
                company_id, events=events, worker="", allow_locked_tasks=True
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user