from collections import OrderedDict from datetime import datetime from typing import Collection, Sequence, Tuple, Any, Optional, Dict import dpath import six from mongoengine import Q from six import string_types import apiserver.database.utils as dbutils from apiserver.apierrors import errors from apiserver.bll.organization import OrgBLL, Tags from apiserver.config_repo import config from apiserver.database.errors import translate_errors_context from apiserver.database.model.model import Model from apiserver.database.model.project import Project from apiserver.database.model.task.metrics import EventStats, MetricEventStats from apiserver.database.model.task.output import Output from apiserver.database.model.task.task import ( Task, TaskStatus, TaskStatusMessage, TaskSystemTags, ArtifactModes, external_task_types, ) from apiserver.database.utils import get_company_or_none_constraint, id as create_id from apiserver.es_factory import es_factory from apiserver.service_repo import APICall from apiserver.timing_context import TimingContext from apiserver.utilities.parameter_key_escaper import ParameterKeyEscaper from .artifacts import artifacts_prepare_for_save from .param_utils import params_prepare_for_save from .utils import ChangeStatusRequest, validate_status_change log = config.logger(__file__) org_bll = OrgBLL() class TaskBLL(object): def __init__(self, events_es=None): self.events_es = ( events_es if events_es is not None else es_factory.connect("events") ) @classmethod def get_types(cls, company, project_ids: Optional[Sequence]) -> set: """ Return the list of unique task types used by company and public tasks If project ids passed then only tasks from these projects are considered """ query = get_company_or_none_constraint(company) if project_ids: query &= Q(project__in=project_ids) res = Task.objects(query).distinct(field="type") return set(res).intersection(external_task_types) @staticmethod def get_task_with_access( task_id, company_id, only=None, allow_public=False, requires_write_access=False ) -> Task: """ Gets a task that has a required write access :except errors.bad_request.InvalidTaskId: if the task is not found :except errors.forbidden.NoWritePermission: if write_access was required and the task cannot be modified """ with translate_errors_context(): query = dict(id=task_id, company=company_id) with TimingContext("mongo", "task_with_access"): if requires_write_access: task = Task.get_for_writing(_only=only, **query) else: task = Task.get(_only=only, **query, include_public=allow_public) if not task: raise errors.bad_request.InvalidTaskId(**query) return task @staticmethod def get_by_id( company_id, task_id, required_status=None, only_fields=None, allow_public=False, ): if only_fields: if isinstance(only_fields, string_types): only_fields = [only_fields] else: only_fields = list(only_fields) only_fields = only_fields + ["status"] with TimingContext("mongo", "task_by_id_all"): tasks = Task.get_many( company=company_id, query=Q(id=task_id), allow_public=allow_public, override_projection=only_fields, return_dicts=False, ) task = None if not tasks else tasks[0] if not task: raise errors.bad_request.InvalidTaskId(id=task_id) if required_status and not task.status == required_status: raise errors.bad_request.InvalidTaskStatus(expected=required_status) return task @staticmethod def assert_exists( company_id, task_ids, only=None, allow_public=False, return_tasks=True ) -> Optional[Sequence[Task]]: task_ids = [task_ids] if isinstance(task_ids, six.string_types) else task_ids with translate_errors_context(), TimingContext("mongo", "task_exists"): ids = set(task_ids) q = Task.get_many( company=company_id, query=Q(id__in=ids), allow_public=allow_public, return_dicts=False, ) if only: # Make sure to reset fields filters (some fields are excluded by default) since this # is an internal call and specific fields were requested. q = q.all_fields().only(*only) if q.count() != len(ids): raise errors.bad_request.InvalidTaskId(ids=task_ids) if return_tasks: return list(q) @staticmethod def create(call: APICall, fields: dict): identity = call.identity now = datetime.utcnow() return Task( id=create_id(), user=identity.user, company=identity.company, created=now, last_update=now, **fields, ) @staticmethod def validate_execution_model(task, allow_only_public=False): if not task.execution or not task.execution.model: return company = None if allow_only_public else task.company model_id = task.execution.model model = Model.objects( Q(id=model_id) & get_company_or_none_constraint(company) ).first() if not model: raise errors.bad_request.InvalidModelId(model=model_id) return model @classmethod def clone_task( cls, company_id, user_id, task_id, name: Optional[str] = None, comment: Optional[str] = None, parent: Optional[str] = None, project: Optional[str] = None, tags: Optional[Sequence[str]] = None, system_tags: Optional[Sequence[str]] = None, hyperparams: Optional[dict] = None, configuration: Optional[dict] = None, execution_overrides: Optional[dict] = None, validate_references: bool = False, ) -> Task: params_dict = { field: value for field, value in ( ("hyperparams", hyperparams), ("configuration", configuration), ) if value is not None } task = cls.get_by_id(company_id=company_id, task_id=task_id, allow_public=True) execution_dict = task.execution.to_proper_dict() if task.execution else {} execution_model_overriden = False if execution_overrides: execution_model_overriden = execution_overrides.get("model") is not None artifacts_prepare_for_save({"execution": execution_overrides}) params_dict["execution"] = {} for legacy_param in ("parameters", "configuration"): legacy_value = execution_overrides.pop(legacy_param, None) if legacy_value is not None: params_dict["execution"] = legacy_value execution_dict.update(execution_overrides) params_prepare_for_save(params_dict, previous_task=task) artifacts = execution_dict.get("artifacts") if artifacts: execution_dict["artifacts"] = { k: a for k, a in artifacts.items() if a.get("mode") != ArtifactModes.output } now = datetime.utcnow() with translate_errors_context(): new_task = Task( id=create_id(), user=user_id, company=company_id, created=now, last_update=now, name=name or task.name, comment=comment or task.comment, parent=parent or task.parent, project=project or task.project, tags=tags or task.tags, system_tags=system_tags or [], type=task.type, script=task.script, output=Output(destination=task.output.destination) if task.output else None, execution=execution_dict, configuration=params_dict.get("configuration") or task.configuration, hyperparams=params_dict.get("hyperparams") or task.hyperparams, ) cls.validate( new_task, validate_model=validate_references or execution_model_overriden, validate_parent=validate_references or parent, validate_project=validate_references or project, ) new_task.save() if task.project == new_task.project: updated_tags = tags updated_system_tags = system_tags else: updated_tags = new_task.tags updated_system_tags = new_task.system_tags org_bll.update_tags( company_id, Tags.Task, project=new_task.project, tags=updated_tags, system_tags=updated_system_tags, ) return new_task @classmethod def validate( cls, task: Task, validate_model=True, validate_parent=True, validate_project=True, ): if ( validate_parent and task.parent and not Task.get( company=task.company, id=task.parent, _only=("id",), include_public=True ) ): raise errors.bad_request.InvalidTaskId("invalid parent", parent=task.parent) if ( validate_project and task.project and not Project.get_for_writing(company=task.company, id=task.project) ): raise errors.bad_request.InvalidProjectId(id=task.project) if validate_model: cls.validate_execution_model(task) @staticmethod def get_unique_metric_variants(company_id, project_ids=None): pipeline = [ { "$match": dict( company=company_id, **({"project": {"$in": project_ids}} if project_ids else {}), ) }, {"$project": {"metrics": {"$objectToArray": "$last_metrics"}}}, {"$unwind": "$metrics"}, { "$project": { "metric": "$metrics.k", "variants": {"$objectToArray": "$metrics.v"}, } }, {"$unwind": "$variants"}, { "$group": { "_id": { "metric": "$variants.v.metric", "variant": "$variants.v.variant", }, "metrics": { "$addToSet": { "metric": "$variants.v.metric", "metric_hash": "$metric", "variant": "$variants.v.variant", "variant_hash": "$variants.k", } }, } }, {"$sort": OrderedDict({"_id.metric": 1, "_id.variant": 1})}, ] with translate_errors_context(): result = Task.aggregate(pipeline) return [r["metrics"][0] for r in result] @staticmethod def set_last_update( task_ids: Collection[str], company_id: str, last_update: datetime ): return Task.objects(id__in=task_ids, company=company_id).update( upsert=False, last_update=last_update ) @staticmethod def update_statistics( task_id: str, company_id: str, last_update: datetime = None, last_iteration: int = None, last_iteration_max: int = None, last_scalar_values: Sequence[Tuple[Tuple[str, ...], Any]] = None, last_events: Dict[str, Dict[str, dict]] = None, **extra_updates, ): """ Update task statistics :param task_id: Task's ID. :param company_id: Task's company ID. :param last_update: Last update time. If not provided, defaults to datetime.utcnow(). :param last_iteration: Last reported iteration. Use this to set a value regardless of current task's last iteration value. :param last_iteration_max: Last reported iteration. Use this to conditionally set a value only if the current task's last iteration value is smaller than the provided value. :param last_scalar_values: Last reported metrics summary for scalar events (value, metric, variant). :param last_events: Last reported metrics summary (value, metric, event type). :param extra_updates: Extra task updates to include in this update call. :return: """ last_update = last_update or datetime.utcnow() if last_iteration is not None: extra_updates.update(last_iteration=last_iteration) elif last_iteration_max is not None: extra_updates.update(max__last_iteration=last_iteration_max) if last_scalar_values is not None: def op_path(op, *path): return "__".join((op, "last_metrics") + path) for path, value in last_scalar_values: if path[-1] == "min_value": extra_updates[op_path("min", *path[:-1], "min_value")] = value elif path[-1] == "max_value": extra_updates[op_path("max", *path[:-1], "max_value")] = value else: extra_updates[op_path("set", *path)] = value if last_events is not None: def events_per_type(metric_data: Dict[str, dict]) -> Dict[str, EventStats]: return { event_type: EventStats(last_update=event["timestamp"]) for event_type, event in metric_data.items() } metric_stats = { dbutils.hash_field_name(metric_key): MetricEventStats( metric=metric_key, event_stats_by_type=events_per_type(metric_data) ) for metric_key, metric_data in last_events.items() } extra_updates["metric_stats"] = metric_stats Task.objects(id=task_id, company=company_id).update( upsert=False, last_update=last_update, **extra_updates ) @classmethod def model_set_ready( cls, model_id: str, company_id: str, publish_task: bool, force_publish_task: bool = False, ) -> tuple: with translate_errors_context(): query = dict(id=model_id, company=company_id) model = Model.objects(**query).first() if not model: raise errors.bad_request.InvalidModelId(**query) elif model.ready: raise errors.bad_request.ModelIsReady(**query) published_task_data = {} if model.task and publish_task: task = ( Task.objects(id=model.task, company=company_id) .only("id", "status") .first() ) if task and task.status != TaskStatus.published: published_task_data["data"] = cls.publish_task( task_id=model.task, company_id=company_id, publish_model=False, force=force_publish_task, ) published_task_data["id"] = model.task updated = model.update(upsert=False, ready=True) return updated, published_task_data @classmethod def publish_task( cls, task_id: str, company_id: str, publish_model: bool, force: bool, status_reason: str = "", status_message: str = "", ) -> dict: task = cls.get_task_with_access( task_id, company_id=company_id, requires_write_access=True ) if not force: validate_status_change(task.status, TaskStatus.published) previous_task_status = task.status output = task.output or Output() publish_failed = False try: # set state to publishing task.status = TaskStatus.publishing task.save() # publish task models if task.output.model and publish_model: output_model = ( Model.objects(id=task.output.model) .only("id", "task", "ready") .first() ) if output_model and not output_model.ready: cls.model_set_ready( model_id=task.output.model, company_id=company_id, publish_task=False, ) # set task status to published, and update (or set) it's new output (view and models) return ChangeStatusRequest( task=task, new_status=TaskStatus.published, force=force, status_reason=status_reason, status_message=status_message, ).execute(published=datetime.utcnow(), output=output) except Exception as ex: publish_failed = True raise ex finally: if publish_failed: task.status = previous_task_status task.save() @classmethod def stop_task( cls, task_id: str, company_id: str, user_name: str, status_reason: str, force: bool, ) -> dict: """ Stop a running task. Requires task status 'in_progress' and execution_progress 'running', or force=True. Development task or task that has no associated worker is stopped immediately. For a non-development task with worker only the status message is set to 'stopping' to allow the worker to stop the task and report by itself :return: updated task fields """ task = cls.get_task_with_access( task_id, company_id=company_id, only=( "status", "project", "tags", "system_tags", "last_worker", "last_update", ), requires_write_access=True, ) def is_run_by_worker(t: Task) -> bool: """Checks if there is an active worker running the task""" update_timeout = config.get("apiserver.workers.task_update_timeout", 600) return ( t.last_worker and t.last_update and (datetime.utcnow() - t.last_update).total_seconds() < update_timeout ) if TaskSystemTags.development in task.system_tags or not is_run_by_worker(task): new_status = TaskStatus.stopped status_message = f"Stopped by {user_name}" else: new_status = task.status status_message = TaskStatusMessage.stopping return ChangeStatusRequest( task=task, new_status=new_status, status_reason=status_reason, status_message=status_message, force=force, ).execute() @staticmethod def get_aggregated_project_parameters( company_id, project_ids: Sequence[str] = None, page: int = 0, page_size: int = 500, ) -> Tuple[int, int, Sequence[dict]]: page = max(0, page) page_size = max(1, page_size) pipeline = [ { "$match": { "company": company_id, "hyperparams": {"$exists": True, "$gt": {}}, **({"project": {"$in": project_ids}} if project_ids else {}), } }, {"$project": {"sections": {"$objectToArray": "$hyperparams"}}}, {"$unwind": "$sections"}, { "$project": { "section": "$sections.k", "names": {"$objectToArray": "$sections.v"}, } }, {"$unwind": "$names"}, {"$group": {"_id": {"section": "$section", "name": "$names.k"}}}, {"$sort": OrderedDict({"_id.section": 1, "_id.name": 1})}, { "$group": { "_id": 1, "total": {"$sum": 1}, "results": {"$push": "$$ROOT"}, } }, { "$project": { "total": 1, "results": {"$slice": ["$results", page * page_size, page_size]}, } }, ] with translate_errors_context(): result = next(Task.aggregate(pipeline), None) total = 0 remaining = 0 results = [] if result: total = int(result.get("total", -1)) results = [ { "section": ParameterKeyEscaper.unescape( dpath.get(r, "_id/section") ), "name": ParameterKeyEscaper.unescape(dpath.get(r, "_id/name")), } for r in result.get("results", []) ] remaining = max(0, total - (len(results) + page * page_size)) return total, remaining, results