Compare commits

22 Commits

Author SHA1 Message Date
allegroai
c8e4d9eeac Fix Dockerfile uses deprecated base image 2023-04-18 10:50:13 +03:00
dependabot[bot]
b51aa5c29b Bump redis from 3.5.3 to 4.4.4 in /apiserver (#190)
Bumps [redis](https://github.com/redis/redis-py) from 3.5.3 to 4.4.4.
- [Release notes](https://github.com/redis/redis-py/releases)
- [Changelog](https://github.com/redis/redis-py/blob/master/CHANGES)
- [Commits](https://github.com/redis/redis-py/compare/3.5.3...v4.4.4)

---
updated-dependencies:
- dependency-name: redis
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2023-04-01 08:59:02 +03:00
allegroai
e7c9daa42b Fix get_task_events to correctly use last_iters for model events 2023-03-28 16:45:44 +03:00
allegroai
7357654249 Version bump to v1.10 2023-03-23 19:17:00 +02:00
allegroai
a6f671b46a Fix typo 2023-03-23 19:16:38 +02:00
allegroai
17a8b440bd Fix only last event of each type is stored per model (all should be stored) 2023-03-23 19:16:30 +02:00
allegroai
eb2b9cbd9a Fix project count for datasets and pipelines 2023-03-23 19:15:42 +02:00
allegroai
797e503e67 Update ES version 2023-03-23 19:14:33 +02:00
allegroai
30cfdac8f2 Fix project preview completed_tasks_24h should not count tasks that are marked as failed or running 2023-03-23 19:13:52 +02:00
allegroai
24bb87aaee Turn on mongo sorting using disk usage by default for sorting in *.get_all* apis 2023-03-23 19:12:52 +02:00
allegroai
dd49ba180a Improve statistics on projects children 2023-03-23 19:11:45 +02:00
allegroai
bda903d0d8 Set API version to 2.24 2023-03-23 19:11:13 +02:00
allegroai
9739eb2d5a Add report_assets field to report tasks 2023-03-23 19:09:03 +02:00
allegroai
cfbb37238f Add default workers timeout to the server's configuration 2023-03-23 19:08:11 +02:00
allegroai
6664c6237e Support querying by children_type in projects.get_all_ex 2023-03-23 19:07:42 +02:00
allegroai
74200a24bd Add filtering on child projects in projects.get_all_ex 2023-03-23 19:06:49 +02:00
john-zielke-snkeos
2fb9288a6c Add env switch to disable nginx ipv6 bind (#165) 2023-03-13 16:05:43 +02:00
shyallegro
5d014d81af Fix #184 and update docker build to include widgets (#185) 2023-03-07 11:26:12 +02:00
allegroai
3a2675abe1 Version bump to v1.9.2 2023-01-24 16:11:21 +02:00
allegroai
f0d68b1ce9 Make sure model label values are integer 2023-01-24 16:11:12 +02:00
allegroai
15db9cdaef Allow updating comments on published reports 2023-01-24 14:40:32 +02:00
Mal Miller
a45d47f5d7 Fix default value of CLEARML_AGENT_UPDATE_VERSION for agent-services (#114) 2023-01-03 13:45:52 +02:00
35 changed files with 659 additions and 159 deletions

View File

@@ -1,3 +1,5 @@
from enum import Enum
from jsonmodels import models, fields
from apiserver.apimodels import ListField, ActualEnumField, DictField
@@ -56,6 +58,12 @@ class ProjectModelMetadataValuesRequest(MultiProjectRequest):
allow_public = fields.BoolField(default=True)
class ProjectChildrenType(Enum):
pipeline = "pipeline"
report = "report"
dataset = "dataset"
class ProjectsGetRequest(models.Base):
include_dataset_stats = fields.BoolField(default=False)
include_stats = fields.BoolField(default=False)
@@ -68,3 +76,4 @@ class ProjectsGetRequest(models.Base):
shallow_search = fields.BoolField(default=False)
search_hidden = fields.BoolField(default=False)
allow_public = fields.BoolField(default=True)
children_type = ActualEnumField(ProjectChildrenType)

View File

@@ -14,6 +14,7 @@ class UpdateReportRequest(Base):
tags = ListField(items_types=[str])
comment = StringField()
report = StringField()
report_assets = ListField(items_types=[str])
class CreateReportRequest(Base):
@@ -22,6 +23,7 @@ class CreateReportRequest(Base):
comment = StringField()
report = StringField()
project = StringField()
report_assets = ListField(items_types=[str])
class PublishReportRequest(Base):

View File

@@ -24,9 +24,7 @@ class WorkerRequest(Base):
class RegisterRequest(WorkerRequest):
timeout = make_default(
IntField, DEFAULT_TIMEOUT
)() # registration timeout in seconds (default is 10min)
timeout = IntField(default=0) # registration timeout in seconds (if not specified, default is 10min)
queues = ListField(six.string_types) # list of queues this worker listens to

View File

@@ -64,7 +64,7 @@ class PlotFields:
class EventBLL(object):
id_fields = ("task", "iter", "metric", "variant", "key")
event_id_fields = ("task", "iter", "metric", "variant", "key")
empty_scroll = "FFFF"
img_source_regex = re.compile(
r"['\"]source['\"]:\s?['\"]([a-z][a-z0-9+\-.]*://.*?)['\"]",
@@ -219,13 +219,10 @@ class EventBLL(object):
# force iter to be a long int
iter = event.get("iter")
if iter is not None:
if model_events:
iter = 0
else:
iter = int(iter)
if iter > MAX_LONG or iter < MIN_LONG:
errors_per_type[invalid_iteration_error] += 1
continue
iter = int(iter)
if iter > MAX_LONG or iter < MIN_LONG:
errors_per_type[invalid_iteration_error] += 1
continue
event["iter"] = iter
# used to have "values" to indicate array. no need anymore
@@ -487,7 +484,7 @@ class EventBLL(object):
)
def _get_event_id(self, event):
id_values = (str(event[field]) for field in self.id_fields if field in event)
id_values = (str(event[field]) for field in self.event_id_fields if field in event)
return hashlib.md5("-".join(id_values).encode()).hexdigest()
def scroll_task_events(
@@ -717,7 +714,6 @@ class EventBLL(object):
size=500,
scroll_id=None,
no_scroll=False,
model_events=False,
) -> TaskEventsResult:
if scroll_id == self.empty_scroll:
return TaskEventsResult()
@@ -746,7 +742,7 @@ class EventBLL(object):
if metrics:
must.append(get_metric_variants_condition(metrics))
if last_iter_count is None or model_events:
if last_iter_count is None:
must.append({"terms": {"task": task_ids}})
else:
tasks_iters = self.get_last_iters(

View File

@@ -1,8 +1,7 @@
import itertools
from collections import defaultdict
from datetime import datetime, timedelta
from functools import reduce
from itertools import groupby
from itertools import groupby, chain
from operator import itemgetter
from typing import (
Sequence,
@@ -22,6 +21,7 @@ from mongoengine import Q, Document
from apiserver import database
from apiserver.apierrors import errors
from apiserver.apimodels.projects import ProjectChildrenType
from apiserver.config_repo import config
from apiserver.database.model import EntityVisibility, AttributedDocument
from apiserver.database.model.base import GetMixin
@@ -40,13 +40,22 @@ from .sub_projects import (
_ids_with_children,
_ids_with_parents,
_get_project_depth,
ProjectsChildren,
)
log = config.logger(__file__)
max_depth = config.get("services.projects.sub_projects.max_depth", 10)
reports_project_name = ".reports"
datasets_project_name = ".datasets"
pipelines_project_name = ".pipelines"
reports_tag = "reports"
dataset_tag = "dataset"
pipeline_tag = "pipeline"
class ProjectBLL:
child_classes = (Task, Model)
@classmethod
def merge_project(
cls, company, source_id: str, destination_id: str
@@ -81,7 +90,7 @@ class ProjectBLL:
)
moved_entities = 0
for entity_type in (Task, Model):
for entity_type in cls.child_classes:
moved_entities += entity_type.objects(
company=company,
project=source_id,
@@ -396,6 +405,18 @@ class ProjectBLL:
"$completed",
{"$gt": ["$completed", time_thresh]},
additional_cond,
{
"$not": {
"$in": [
"$status",
[
TaskStatus.queued,
TaskStatus.in_progress,
TaskStatus.failed,
],
]
}
},
]
},
"then": 1,
@@ -509,7 +530,7 @@ class ProjectBLL:
def aggregate_project_data(
func: Callable[[T, T], T],
project_ids: Sequence[str],
child_projects: Mapping[str, Sequence[Project]],
child_projects: ProjectsChildren,
data: Mapping[str, T],
) -> Dict[str, T]:
"""
@@ -561,6 +582,136 @@ class ProjectBLL:
for r in Task.aggregate(task_runtime_pipeline)
}
@staticmethod
def _get_projects_children(
project_ids: Sequence[str], search_hidden: bool, allowed_ids: Sequence[str],
) -> Tuple[ProjectsChildren, Set[str]]:
child_projects = _get_sub_projects(
project_ids,
_only=("id", "name"),
search_hidden=search_hidden,
allowed_ids=allowed_ids,
)
return (
child_projects,
{c.id for c in chain.from_iterable(child_projects.values())},
)
@staticmethod
def _get_children_info(
project_ids: Sequence[str], child_projects: ProjectsChildren
) -> dict:
return {
project: sorted(
[{"id": c.id, "name": c.name} for c in child_projects.get(project, [])],
key=itemgetter("name"),
)
for project in project_ids
}
@classmethod
def _get_project_dataset_stats_core(
cls,
company: str,
project_ids: Sequence[str],
project_field: str,
entity_class: Type[AttributedDocument],
include_children: bool = True,
filter_: Mapping[str, Any] = None,
users: Sequence[str] = None,
selected_project_ids: Sequence[str] = None,
) -> Tuple[Dict[str, dict], Dict[str, dict]]:
if not project_ids:
return {}, {}
child_projects = {}
project_ids_with_children = set(project_ids)
if include_children:
child_projects, children_ids = cls._get_projects_children(
project_ids, search_hidden=True, allowed_ids=selected_project_ids,
)
project_ids_with_children |= children_ids
pipeline = [
{
"$match": cls.get_match_conditions(
company=company,
project_ids=list(project_ids_with_children),
filter_=filter_,
users=users,
project_field=project_field,
)
},
{"$project": {project_field: 1, "tags": 1}},
{
"$group": {
"_id": f"${project_field}",
"count": {"$sum": 1},
"tags": {"$push": "$tags"},
}
},
]
res = entity_class.aggregate(pipeline)
project_stats = {
result["_id"]: {
"count": result.get("count", 0),
"tags": set(chain.from_iterable(result.get("tags", []))),
}
for result in res
}
def concat_dataset_stats(a: dict, b: dict) -> dict:
return {
"count": a.get("count", 0) + b.get("count", 0),
"tags": a.get("tags", {}) | b.get("tags", {}),
}
top_project_stats = cls.aggregate_project_data(
func=concat_dataset_stats,
project_ids=project_ids,
child_projects=child_projects,
data=project_stats,
)
for _, stat in top_project_stats.items():
stat["tags"] = sorted(list(stat.get("tags", {})))
empty_stats = {"count": 0, "tags": []}
stats = {
project: {"datasets": top_project_stats.get(project, empty_stats)}
for project in project_ids
}
return stats, cls._get_children_info(project_ids, child_projects)
@classmethod
def get_project_dataset_stats(
cls,
company: str,
project_ids: Sequence[str],
include_children: bool = True,
filter_: Mapping[str, Any] = None,
users: Sequence[str] = None,
selected_project_ids: Sequence[str] = None,
) -> Tuple[Dict[str, dict], Dict[str, dict]]:
filter_ = filter_ or {}
filter_system_tags = filter_.get("system_tags")
if not isinstance(filter_system_tags, list):
filter_system_tags = []
if dataset_tag not in filter_system_tags:
filter_system_tags.append(dataset_tag)
filter_["system_tags"] = filter_system_tags
return cls._get_project_dataset_stats_core(
company=company,
project_ids=project_ids,
project_field="parent",
entity_class=Project,
include_children=include_children,
filter_=filter_,
users=users,
selected_project_ids=selected_project_ids,
)
@classmethod
def get_project_stats(
cls,
@@ -571,24 +722,21 @@ class ProjectBLL:
search_hidden: bool = False,
filter_: Mapping[str, Any] = None,
users: Sequence[str] = None,
user_active_project_ids: Sequence[str] = None,
selected_project_ids: Sequence[str] = None,
) -> Tuple[Dict[str, dict], Dict[str, dict]]:
if not project_ids:
return {}, {}
child_projects = (
_get_sub_projects(
child_projects = {}
project_ids_with_children = set(project_ids)
if include_children:
child_projects, children_ids = cls._get_projects_children(
project_ids,
_only=("id", "name"),
search_hidden=search_hidden,
allowed_ids=user_active_project_ids,
allowed_ids=selected_project_ids,
)
if include_children
else {}
)
project_ids_with_children = set(project_ids) | {
c.id for c in itertools.chain.from_iterable(child_projects.values())
}
project_ids_with_children |= children_ids
status_count_pipeline, runtime_pipeline = cls.make_projects_get_all_pipelines(
company,
project_ids=list(project_ids_with_children),
@@ -692,14 +840,7 @@ class ProjectBLL:
for project in project_ids
}
children = {
project: sorted(
[{"id": c.id, "name": c.name} for c in child_projects.get(project, [])],
key=itemgetter("name"),
)
for project in project_ids
}
return stats, children
return stats, cls._get_children_info(project_ids, child_projects)
@classmethod
def get_active_users(
@@ -724,7 +865,7 @@ class ProjectBLL:
projects_query &= Q(id__in=project_ids)
res = set(Project.objects(projects_query).distinct(field="user"))
for cls_ in (Task, Model):
for cls_ in cls.child_classes:
res |= set(cls_.objects(query).distinct(field="user"))
return res
@@ -753,46 +894,81 @@ class ProjectBLL:
return tags, system_tags
@classmethod
def get_projects_with_active_user(
def get_projects_with_selected_children(
cls,
company: str,
users: Sequence[str],
users: Sequence[str] = None,
project_ids: Optional[Sequence[str]] = None,
allow_public: bool = True,
children_type: ProjectChildrenType = None,
) -> Tuple[Sequence[str], Sequence[str]]:
"""
Get the projects ids where user created any tasks including all the parents of these projects
Get the projects ids matching children_condition (if passed) or where the passed user created any tasks
including all the parents of these projects
If project ids are specified then filter the results by these project ids
"""
query = Q(user__in=users)
if not (users or children_type):
raise errors.bad_request.ValidationError(
"Either active users or children_condition should be specified"
)
if allow_public:
query &= get_company_or_none_constraint(company)
query = (
get_company_or_none_constraint(company)
if allow_public
else Q(company=company)
)
if users:
query &= Q(user__in=users)
project_query = None
if children_type == ProjectChildrenType.dataset:
child_queries = {
Project: query
& Q(system_tags__in=[dataset_tag], basename__ne=datasets_project_name)
}
elif children_type == ProjectChildrenType.pipeline:
child_queries = {Task: query & Q(system_tags__in=[pipeline_tag])}
elif children_type == ProjectChildrenType.report:
child_queries = {Task: query & Q(system_tags__in=[reports_tag])}
else:
query &= Q(company=company)
project_query = query
child_queries = {entity_cls: query for entity_cls in cls.child_classes}
user_projects_query = query
if project_ids:
ids_with_children = _ids_with_children(project_ids)
query &= Q(project__in=ids_with_children)
user_projects_query &= Q(id__in=ids_with_children)
if project_query:
project_query &= Q(id__in=ids_with_children)
for child_cls in child_queries:
child_queries[child_cls] &= (
Q(parent__in=ids_with_children)
if child_cls is Project
else Q(project__in=ids_with_children)
)
res = {p.id for p in Project.objects(user_projects_query).only("id")}
for cls_ in (Task, Model):
res |= set(cls_.objects(query).distinct(field="project"))
res = (
{p.id for p in Project.objects(project_query).only("id")}
if project_query
else set()
)
for cls_, query_ in child_queries.items():
res |= set(
cls_.objects(query_).distinct(
field="parent" if cls_ is Project else "project"
)
)
res = list(res)
if not res:
return res, res
user_active_project_ids = _ids_with_parents(res)
selected_project_ids = _ids_with_parents(res)
filtered_ids = (
list(set(user_active_project_ids) & set(project_ids))
list(set(selected_project_ids) & set(project_ids))
if project_ids
else list(user_active_project_ids)
else list(selected_project_ids)
)
return filtered_ids, user_active_project_ids
return filtered_ids, selected_project_ids
@classmethod
def get_task_parents(
@@ -865,10 +1041,11 @@ class ProjectBLL:
project_ids: Sequence[str],
filter_: Mapping[str, Any],
users: Sequence[str],
project_field: str = "project",
):
conditions = {
"company": {"$in": [None, "", company]},
"project": {"$in": project_ids},
project_field: {"$in": project_ids},
}
if users:
conditions["user"] = {"$in": users}
@@ -893,6 +1070,69 @@ class ProjectBLL:
return conditions
@classmethod
def _calc_own_datasets_core(
cls,
company: str,
project_ids: Sequence[str],
project_field: str,
entity_class: Type[AttributedDocument],
filter_: Mapping[str, Any] = None,
users: Sequence[str] = None,
) -> Dict[str, dict]:
"""
Returns the amount of hyper datasets per requested project
"""
if not project_ids:
return {}
pipeline = [
{
"$match": cls.get_match_conditions(
company=company,
project_ids=project_ids,
filter_=filter_,
users=users,
project_field=project_field,
)
},
{"$project": {project_field: 1}},
{"$group": {"_id": f"${project_field}", "count": {"$sum": 1}}},
]
datasets = {
data["_id"]: data["count"] for data in entity_class.aggregate(pipeline)
}
return {pid: {"own_datasets": datasets.get(pid, 0)} for pid in project_ids}
@classmethod
def calc_own_datasets(
cls,
company: str,
project_ids: Sequence[str],
filter_: Mapping[str, Any] = None,
users: Sequence[str] = None,
) -> Dict[str, dict]:
"""
Returns the amount of datasets per requested project
"""
filter_ = filter_ or {}
filter_system_tags = filter_.get("system_tags")
if not isinstance(filter_system_tags, list):
filter_system_tags = []
if dataset_tag not in filter_system_tags:
filter_system_tags.append(dataset_tag)
filter_["system_tags"] = filter_system_tags
return cls._calc_own_datasets_core(
company=company,
project_ids=project_ids,
project_field="parent",
entity_class=Project,
filter_=filter_,
users=users,
)
@classmethod
def calc_own_contents(
cls,

View File

@@ -1,3 +1,4 @@
from collections import defaultdict
from typing import Tuple, Set, Sequence
import attr
@@ -15,6 +16,7 @@ from apiserver.database.model import EntityVisibility
from apiserver.database.model.model import Model
from apiserver.database.model.project import Project
from apiserver.database.model.task.task import Task, ArtifactModes, TaskType
from .project_bll import ProjectBLL
from .sub_projects import _ids_with_children
log = config.logger(__file__)
@@ -40,9 +42,9 @@ def validate_project_delete(company: str, project_id: str):
is_pipeline = "pipeline" in (project.system_tags or [])
project_ids = _ids_with_children([project_id])
ret = {}
for cls in (Task, Model):
for cls in ProjectBLL.child_classes:
ret[f"{cls.__name__.lower()}s"] = cls.objects(project__in=project_ids).count()
for cls in (Task, Model):
for cls in ProjectBLL.child_classes:
query = dict(
project__in=project_ids, system_tags__nin=[EntityVisibility.archived.value]
)
@@ -98,9 +100,10 @@ def delete_project(
)
if not delete_contents:
for cls in (Model, Task):
updated_count = cls.objects(project__in=project_ids).update(project=None)
res = DeleteProjectResult(disassociated_tasks=updated_count)
disassociated = defaultdict(int)
for cls in ProjectBLL.child_classes:
disassociated[cls] = cls.objects(project__in=project_ids).update(project=None)
res = DeleteProjectResult(disassociated_tasks=disassociated[Task])
else:
deleted_models, model_event_urls, model_urls = _delete_models(
company=company, projects=project_ids

View File

@@ -14,14 +14,16 @@ def _get_project_depth(project_name: str) -> int:
return len(list(filter(None, project_name.split(name_separator))))
def _validate_project_name(project_name: str) -> Tuple[str, str]:
def _validate_project_name(project_name: str, raise_if_empty=True) -> Tuple[str, str]:
"""
Remove redundant '/' characters. Ensure that the project name is not empty
Return the cleaned up project name and location
"""
name_parts = list(filter(None, project_name.split(name_separator)))
name_parts = [p.strip() for p in project_name.split(name_separator) if p]
if not name_parts:
raise errors.bad_request.InvalidProjectName(name=project_name)
if raise_if_empty:
raise errors.bad_request.InvalidProjectName(name=project_name)
return "", ""
return name_separator.join(name_parts), name_separator.join(name_parts[:-1])
@@ -34,7 +36,7 @@ def _ensure_project(
If needed auto-create the project and all the missing projects in the path to it
Return the project
"""
name = name.strip(name_separator)
name, location = _validate_project_name(name, raise_if_empty=False)
if not name:
return None
@@ -43,7 +45,6 @@ def _ensure_project(
return project
now = datetime.utcnow()
name, location = _validate_project_name(name)
project = Project(
id=database.utils.id(),
user=user,
@@ -101,12 +102,15 @@ def _get_writable_project_from_name(
return qs.first()
ProjectsChildren = Mapping[str, Sequence[Project]]
def _get_sub_projects(
project_ids: Sequence[str],
_only: Sequence[str] = ("id", "path"),
search_hidden=True,
allowed_ids: Sequence[str] = None,
) -> Mapping[str, Sequence[Project]]:
) -> ProjectsChildren:
"""
Return the list of child projects of all the levels for the parent project ids
"""
@@ -159,14 +163,14 @@ def _update_subproject_names(
now = datetime.utcnow()
for child in children:
child_suffix = name_separator.join(
child.name.split(name_separator)[len(old_name.split(name_separator)) :]
child.name.split(name_separator)[len(old_name.split(name_separator)):]
)
updates = {
"name": name_separator.join((project.name, child_suffix)),
"last_update": now,
}
if update_path:
updates["path"] = project.path + child.path[len(old_path) :]
updates["path"] = project.path + child.path[len(old_path):]
updated += child.update(upsert=False, **updates)
return updated

View File

@@ -256,6 +256,16 @@ class TaskBLL:
not in [TaskSystemTags.development, EntityVisibility.archived.value]
]
def ensure_int_labels(execution: dict) -> dict:
if not execution:
return execution
model_labels = execution.get("model_labels")
if model_labels:
execution["model_labels"] = {k: int(v) for k, v in model_labels.items()}
return execution
parent_task = (
task.parent
if task.parent and not task.parent.startswith(deleted_prefix)
@@ -280,7 +290,7 @@ class TaskBLL:
output=Output(destination=task.output.destination) if task.output else None,
models=Models(input=input_models or task.models.input),
container=escape_dict(container) or task.container,
execution=execution_dict,
execution=ensure_int_labels(execution_dict),
configuration=params_dict.get("configuration") or task.configuration,
hyperparams=params_dict.get("hyperparams") or task.hyperparams,
)

View File

@@ -41,10 +41,6 @@
# controls whether FieldDoesNotExist exception will be raised for any extra attribute existing in stored data
# but not declared in a data model
strict: false
aggregate {
allow_disk_use: true
}
}
elastic {
@@ -117,6 +113,10 @@
# Timeout in seconds on task status update. If exceeded
# then task can be stopped without communicating to the worker
task_update_timeout: 600
# Timeout in seconds for worker registration (or status report). If a worker did not report for this long,
# it is discarded from the server's table
default_timeout: 600
}
check_for_updates {

View File

@@ -2,3 +2,8 @@ max_page_size: 500
# expiration time in seconds for the redis scroll states in get_many family of apis
scroll_state_expiration_seconds: 600
allow_disk_use {
sort: true
aggregate: true
}

View File

@@ -17,7 +17,7 @@ from typing import (
from boltons.iterutils import first, partition
from dateutil.parser import parse as parse_datetime
from mongoengine import Q, Document, ListField, StringField, IntField
from mongoengine import Q, Document, ListField, StringField, IntField, QuerySet
from pymongo.command_cursor import CommandCursor
from apiserver.apierrors import errors, APIError
@@ -39,7 +39,7 @@ from apiserver.redis_manager import redman
from apiserver.utilities.dicts import project_dict, exclude_fields_from_dict
log = config.logger("dbmodel")
mongo_conf = config.get("services._mongo")
ACCESS_REGEX = re.compile(r"^(?P<prefix>>=|>|<=|<)?(?P<value>.*)$")
ACCESS_MODIFIER = {">=": "gte", ">": "gt", "<=": "lte", "<": "lt"}
@@ -158,7 +158,9 @@ class GetMixin(PropsMixin):
def _get_op(self, v: str, translate: bool = False) -> Optional[str]:
try:
op = (
v[len(self.op_prefix) :] if v and v.startswith(self.op_prefix) else None
v[len(self.op_prefix) :]
if v and v.startswith(self.op_prefix)
else None
)
if translate:
tup = self._ops.get(op, None)
@@ -166,7 +168,9 @@ class GetMixin(PropsMixin):
return op
except AttributeError:
raise errors.bad_request.FieldsValueError(
"invalid value type, string expected", field=self._field, value=str(v)
"invalid value type, string expected",
field=self._field,
value=str(v),
)
def _key(self, v) -> Optional[Union[str, bool]]:
@@ -233,8 +237,8 @@ class GetMixin(PropsMixin):
cls._cache_manager = RedisCacheManager(
state_class=cls.GetManyScrollState,
redis=redman.connection("apiserver"),
expiration_interval=config.get(
"services._mongo.scroll_state_expiration_seconds", 600
expiration_interval=mongo_conf.get(
"scroll_state_expiration_seconds", 600
),
)
@@ -451,7 +455,9 @@ class GetMixin(PropsMixin):
raise
except Exception as ex:
raise errors.bad_request.FieldsValueError(
"failed parsing query field", error=str(ex), **({"field": field} if field else {})
"failed parsing query field",
error=str(ex),
**({"field": field} if field else {}),
)
return query & RegexQ(**dict_query)
@@ -570,7 +576,7 @@ class GetMixin(PropsMixin):
if start is not None:
return start, cls.validate_scroll_size(parameters)
max_page_size = config.get("services._mongo.max_page_size", 500)
max_page_size = mongo_conf.get("max_page_size", 500)
page = parameters.get("page", default_page)
if page is not None and page < 0:
raise errors.bad_request.ValidationError("page must be >=0", field="page")
@@ -880,6 +886,13 @@ class GetMixin(PropsMixin):
return cls._get_many_no_company(query=_query, override_projection=projection)
@staticmethod
def _get_qs_with_ordering(qs: QuerySet, order_by: Sequence):
disk_use_setting = mongo_conf.get("allow_disk_use.sort", None)
if disk_use_setting is not None:
qs = qs.allow_disk_use(disk_use_setting)
return qs.order_by(*order_by)
@classmethod
def _get_many_no_company(
cls: Union["GetMixin", Document],
@@ -921,7 +934,7 @@ class GetMixin(PropsMixin):
qs = qs.search_text(search_text)
if order_by:
# add ordering
qs = qs.order_by(*order_by)
qs = cls._get_qs_with_ordering(qs, order_by)
if include:
# add projection
@@ -1013,7 +1026,7 @@ class GetMixin(PropsMixin):
res = cls._get_queries_for_order_field(query, order_field)
if res:
query_sets = [cls.objects(q) for q in res]
query_sets = [qs.order_by(*order_by) for qs in query_sets]
query_sets = [cls._get_qs_with_ordering(qs, order_by) for qs in query_sets]
if order_field and not override_collation:
override_collation = cls._get_collation_override(order_field)
@@ -1173,7 +1186,7 @@ class DbModelMixin(GetMixin, ProperDictMixin, UpdateMixin):
kwargs.update(
allowDiskUse=allow_disk_use
if allow_disk_use is not None
else config.get("apiserver.mongo.aggregate.allow_disk_use", True)
else mongo_conf.get("allow_disk_use.aggregate", True)
)
return cls.objects.aggregate(pipeline, **kwargs)

View File

@@ -19,6 +19,7 @@ from apiserver.database.fields import (
SafeSortedListField,
EmbeddedDocumentListField,
NullableStringField,
NoneType,
)
from apiserver.database.model import AttributedDocument
from apiserver.database.model.base import ProperDictMixin, GetMixin
@@ -89,7 +90,9 @@ class Artifact(EmbeddedDocument):
content_size = LongField()
timestamp = LongField()
type_data = EmbeddedDocumentField(ArtifactTypeData)
display_data = SafeSortedListField(ListField(UnionField((int, float, str))))
display_data = SafeSortedListField(
ListField(UnionField((int, float, str, NoneType)))
)
class ParamsItem(EmbeddedDocument, ProperDictMixin):
@@ -231,6 +234,7 @@ class Task(AttributedDocument):
range_fields=("started", "active_duration", "last_metrics.*", "last_iteration"),
datetime_fields=("status_changed", "last_update"),
pattern_fields=("name", "comment", "report"),
fields=("execution.queue", "runtime.*", "models.input.model"),
)
id = StringField(primary_key=True)
@@ -245,6 +249,7 @@ class Task(AttributedDocument):
status_changed = DateTimeField()
comment = StringField(user_set_allowed=True)
report = StringField()
report_assets = ListField(StringField())
created = DateTimeField(required=True, user_set_allowed=True)
started = DateTimeField()
completed = DateTimeField()

View File

@@ -67,7 +67,7 @@ class MissingPasswordForElasticUser(Exception):
class ESFactory:
@classmethod
def connect(cls, cluster_name):
def connect(cls, cluster_name) -> Elasticsearch:
"""
Returns the es client for the cluster.
Connects to the cluster if did not connect previously

View File

@@ -29,7 +29,7 @@ pyhocon>=0.3.35
pyjwt>=2.4.0
pymongo[srv]==3.12.0
python-rapidjson>=0.6.3
redis==3.5.3
redis==4.4.4
redis-py-cluster>=2.1.3
requests>=2.13.0
semantic_version>=2.8.3,<3

View File

@@ -61,12 +61,26 @@ _definitions {
type: string
}
last_update {
description: """Last project update time. Reflects the last time the project metadata was changed or a task in this project has changed status"""
description: "Last project update time. Reflects the last time the project metadata was changed or a task in this project has changed status"
type: string
format: "date-time"
}
}
}
stats_datasets {
type: object
properties {
count {
description: Number of datasets
type: integer
}
tags {
description: Dataset tags
type: array
items {type: string}
}
}
}
stats_status_count {
type: object
properties {
@@ -141,6 +155,10 @@ _definitions {
description: "Stats for archived tasks"
"$ref": "#/definitions/stats_status_count"
}
datasets {
description: "Stats for childrent datasets"
"$ref": "#/definitions/stats_datasets"
}
}
}
projects_get_all_response_single {
@@ -191,11 +209,15 @@ _definitions {
type: string
}
last_update {
description: """Last project update time. Reflects the last time the project metadata was changed or a task in this project has changed status"""
description: "Last project update time. Reflects the last time the project metadata was changed or a task in this project has changed status"
type: string
format: "date-time"
}
// extra properties
hidden {
description: "Returned if the search_hidden flag was specified in the get_all_ex call and the project is hidden"
type: boolean
}
stats {
description: "Additional project stats"
"$ref": "#/definitions/stats"
@@ -217,6 +239,10 @@ _definitions {
}
}
}
own_datasets {
description: "The amount of datasets/hyperdatasers under this project (without children projects). Returned if 'check_own_contents' flag is set in the request and children_type is set to 'dataset' or 'hyperdataset'"
type: integer
}
own_tasks {
description: "The amount of tasks under this project (without children projects). Returned if 'check_own_contents' flag is set in the request"
type: integer
@@ -620,6 +646,13 @@ get_all_ex {
}
}
}
"2.24": ${get_all_ex."2.23"} {
request.properties.children_type {
description: If specified that only the projects under which the entities of this type can be found will be returned
type: string
enum: [pipeline, report, dataset]
}
}
}
update {
"2.1" {

View File

@@ -55,6 +55,11 @@ _definitions {
description: "Report template"
type: string
}
report_assets {
description: "List of the external report assets"
type: array
items { type: string }
}
created {
description: "Report creation time (UTC) "
type: string
@@ -146,6 +151,13 @@ create {
}
}
}
"2.24": ${create."2.23"} {
request.properties.report_assets {
description: "List of the external report assets"
type: array
items { type: string }
}
}
}
update {
"2.23" {
@@ -181,6 +193,13 @@ update {
}
response: ${_definitions.update_response}
}
"2.24": ${update."2.23"} {
request.properties.report_assets {
description: "List of the external report assets"
type: array
items { type: string }
}
}
}
move {
"2.23" {

View File

@@ -39,7 +39,7 @@ class ServiceRepo(object):
"""If the check is set, parsing will fail for endpoint request with the version that is grater than the current
maximum """
_max_version = PartialVersion("2.23")
_max_version = PartialVersion("2.24")
""" Maximum version number (the highest min_version value across all endpoints) """
_endpoint_exp = (

View File

@@ -410,7 +410,6 @@ def get_scalar_metric_data(call, company_id, _):
metrics={metric: []},
scroll_id=scroll_id,
no_scroll=no_scroll,
model_events=model_events,
)
call.result.data = dict(
@@ -576,7 +575,6 @@ def _get_multitask_plots(
sort=[{"iter": {"order": "desc"}}],
scroll_id=scroll_id,
no_scroll=no_scroll,
model_events=model_events,
)
return_events = _get_top_iter_unique_events_per_task(
result.events, max_iters=last_iters, task_names=task_names
@@ -786,7 +784,6 @@ def get_debug_images_v1_8(call, company_id, _):
sort=[{"iter": {"order": "desc"}}],
last_iter_count=iters,
scroll_id=scroll_id,
model_events=model_events,
)
return_events = result.events

View File

@@ -76,7 +76,7 @@ def get_entities_count(call: APICall, company, request: EntitiesCountRequest):
requested_ids = data.get("id")
if isinstance(requested_ids, str):
requested_ids = [requested_ids]
ids, _ = project_bll.get_projects_with_active_user(
ids, _ = project_bll.get_projects_with_selected_children(
company=company,
users=request.active_users,
project_ids=requested_ids,
@@ -92,7 +92,7 @@ def get_entities_count(call: APICall, company, request: EntitiesCountRequest):
query = Q()
if (
entity_cls in (Project, Task)
and field != "reports"
and field not in ("reports", "pipelines", "datasets")
and not request.search_hidden
):
query &= Q(system_tags__ne=EntityVisibility.hidden.value)

View File

@@ -1,4 +1,4 @@
from typing import Sequence
from typing import Sequence, Optional, Tuple
import attr
from mongoengine import Q
@@ -18,9 +18,11 @@ from apiserver.apimodels.projects import (
ProjectOrNoneRequest,
ProjectRequest,
ProjectModelMetadataValuesRequest,
ProjectChildrenType,
)
from apiserver.bll.organization import OrgBLL, Tags
from apiserver.bll.project import ProjectBLL, ProjectQueries
from apiserver.bll.project.project_bll import pipeline_tag, reports_tag
from apiserver.bll.project.project_cleanup import (
delete_project,
validate_project_delete,
@@ -28,6 +30,7 @@ from apiserver.bll.project.project_cleanup import (
from apiserver.database.errors import translate_errors_context
from apiserver.database.model import EntityVisibility
from apiserver.database.model.project import Project
from apiserver.database.model.task.task import TaskType
from apiserver.database.utils import (
parse_from_call,
get_company_or_none_constraint,
@@ -96,6 +99,18 @@ def _adjust_search_parameters(data: dict, shallow_search: bool):
data["parent"] = [None]
def _get_project_stats_filter(request: ProjectsGetRequest) -> Tuple[Optional[dict], bool]:
if request.include_stats_filter or not request.children_type:
return request.include_stats_filter, request.search_hidden
if request.children_type == ProjectChildrenType.pipeline:
return {"system_tags": [pipeline_tag], "type": [TaskType.controller]}, True
if request.children_type == ProjectChildrenType.report:
return {"system_tags": [reports_tag], "type": [TaskType.report]}, True
return request.include_stats_filter, request.search_hidden
@endpoint("projects.get_all_ex", request_data_model=ProjectsGetRequest)
def get_all_ex(call: APICall, company_id: str, request: ProjectsGetRequest):
data = call.data
@@ -114,19 +129,28 @@ def get_all_ex(call: APICall, company_id: str, request: ProjectsGetRequest):
_adjust_search_parameters(
data, shallow_search=request.shallow_search,
)
user_active_project_ids = None
if request.active_users:
ids, user_active_project_ids = project_bll.get_projects_with_active_user(
selected_project_ids = None
if request.active_users or request.children_type:
ids, selected_project_ids = project_bll.get_projects_with_selected_children(
company=company_id,
users=request.active_users,
project_ids=requested_ids,
allow_public=allow_public,
children_type=request.children_type,
)
if not ids:
return {"projects": []}
data["id"] = ids
ret_params = {}
remove_system_tags = False
if request.search_hidden:
only_fields = data.get("only_fields")
if isinstance(only_fields, list) and "system_tags" not in only_fields:
only_fields.append("system_tags")
remove_system_tags = True
projects: Sequence[dict] = Project.get_many_with_join(
company=company_id,
query_dict=data,
@@ -137,29 +161,60 @@ def get_all_ex(call: APICall, company_id: str, request: ProjectsGetRequest):
if not projects:
return {"projects": projects, **ret_params}
if request.search_hidden:
for p in projects:
system_tags = (
p.pop("system_tags", [])
if remove_system_tags
else p.get("system_tags", [])
)
if EntityVisibility.hidden.value in system_tags:
p["hidden"] = True
conform_output_tags(call, projects)
project_ids = list({project["id"] for project in projects})
if request.check_own_contents:
contents = project_bll.calc_own_contents(
company=company_id,
project_ids=project_ids,
filter_=request.include_stats_filter,
users=request.active_users,
)
if request.children_type == ProjectChildrenType.dataset:
contents = project_bll.calc_own_datasets(
company=company_id,
project_ids=project_ids,
filter_=request.include_stats_filter,
users=request.active_users,
)
else:
contents = project_bll.calc_own_contents(
company=company_id,
project_ids=project_ids,
filter_=_get_project_stats_filter(request)[0],
users=request.active_users,
)
for project in projects:
project.update(**contents.get(project["id"], {}))
conform_output_tags(call, projects)
if request.include_stats:
stats, children = project_bll.get_project_stats(
company=company_id,
project_ids=project_ids,
specific_state=request.stats_for_state,
include_children=request.stats_with_children,
search_hidden=request.search_hidden,
filter_=request.include_stats_filter,
users=request.active_users,
user_active_project_ids=user_active_project_ids,
)
if request.children_type == ProjectChildrenType.dataset:
stats, children = project_bll.get_project_dataset_stats(
company=company_id,
project_ids=project_ids,
include_children=request.stats_with_children,
filter_=request.include_stats_filter,
users=request.active_users,
selected_project_ids=selected_project_ids,
)
else:
filter_, search_hidden = _get_project_stats_filter(request)
stats, children = project_bll.get_project_stats(
company=company_id,
project_ids=project_ids,
specific_state=request.stats_for_state,
include_children=request.stats_with_children,
search_hidden=search_hidden,
filter_=filter_,
users=request.active_users,
selected_project_ids=selected_project_ids,
)
for project in projects:
project["stats"] = stats[project["id"]]

View File

@@ -16,6 +16,7 @@ from apiserver.apimodels.reports import (
)
from apiserver.apierrors import errors
from apiserver.apimodels.base import UpdateResponse
from apiserver.bll.project.project_bll import reports_project_name, reports_tag
from apiserver.services.utils import process_include_subprojects, sort_tags_response
from apiserver.bll.organization import OrgBLL
from apiserver.bll.project import ProjectBLL
@@ -42,13 +43,12 @@ project_bll = ProjectBLL()
task_bll = TaskBLL()
reports_project_name = ".reports"
reports_tag = "reports"
update_fields = {
"name",
"tags",
"comment",
"report",
"report_assets",
}
@@ -80,7 +80,9 @@ def update_report(call: APICall, company_id: str, request: UpdateReportRequest):
if not partial_update_dict:
return UpdateResponse(updated=0)
allowed_for_published = set(partial_update_dict.keys()).issubset({"tags", "name"})
allowed_for_published = set(partial_update_dict.keys()).issubset(
{"tags", "name", "comment"}
)
if task.status != TaskStatus.created and not allowed_for_published:
raise errors.bad_request.InvalidTaskStatus(
expected=TaskStatus.created, status=task.status

View File

@@ -56,6 +56,9 @@ def register(call: APICall, company_id, request: RegisterRequest):
timeout = request.timeout
queues = request.queues
if not timeout:
timeout = config.get("apiserver.workers.default_timeout", 10 * 60)
if not timeout or timeout <= 0:
raise bad_request.WorkerRegistrationFailed(
"invalid timeout", timeout=timeout, worker=worker

View File

@@ -46,7 +46,12 @@ class TestReports(TestService):
# update is working on draft reports
new_comment = "My new comment"
res = self.api.reports.update(task=task_id, comment=new_comment, tags=[])
res = self.api.reports.update(
task=task_id,
comment=new_comment,
tags=[],
report_assets=["file://test.jpg"],
)
self.assertEqual(res.updated, 1)
task = self.api.tasks.get_all_ex(id=[task_id]).tasks[0]
self.assertEqual(task.name, task_name)
@@ -54,15 +59,19 @@ class TestReports(TestService):
self.assertEqual(task.tags, [])
ret = self.api.reports.get_tags()
self.assertEqual(ret.tags, [])
self.assertEqual(task.report_assets, ["file://test.jpg"])
self.api.reports.publish(task=task_id)
with self.api.raises(errors.bad_request.InvalidTaskStatus):
self.api.reports.update(task=task_id, comment=comment)
self.api.reports.update(task=task_id, report="New report text")
# update on tags or rename can be done for published report too
self.api.reports.update(task=task_id, name="new name", tags=["test"])
self.api.reports.update(
task=task_id, name="new name", tags=["test"], comment="Yet another comment"
)
task = self.api.tasks.get_all_ex(id=[task_id]).tasks[0]
self.assertEqual(task.tags, ["test"])
self.assertEqual(task.name, "new name")
self.assertEqual(task.comment, "Yet another comment")
# move under another project autodeletes the empty project
new_project_name = "Reports Test"
@@ -132,8 +141,7 @@ class TestReports(TestService):
self.send_batch([*debug_image_events, *plot_events])
res = self.api.reports.get_task_data(
id=[non_report_task],
only_fields=["name"],
id=[non_report_task], only_fields=["name"],
)
self.assertEqual(len(res.tasks), 1)
self.assertEqual(res.tasks[0].id, non_report_task)

View File

@@ -33,6 +33,72 @@ class TestSubProjects(TestService):
).projects[0]
self.assertEqual(data.dataset_stats, {"file_count": 2, "total_size": 1000})
def test_query_children(self):
test_root_name = "TestQueryChildren"
test_root = self._temp_project(name=test_root_name)
dataset_tags = ["hello", "world"]
dataset_project = self._temp_project(
name=f"{test_root_name}/Project1/Dataset",
system_tags=["dataset"],
tags=dataset_tags,
)
self._temp_task(
name="dataset task",
type="data_processing",
system_tags=["dataset"],
project=dataset_project,
)
self._temp_task(name="regular task", project=dataset_project)
pipeline_project = self._temp_project(
name=f"{test_root_name}/Project2/Pipeline", system_tags=["pipeline"]
)
self._temp_task(
name="pipeline task",
type="controller",
system_tags=["pipeline"],
project=pipeline_project,
)
self._temp_task(name="regular task", project=pipeline_project)
report_project = self._temp_project(name=f"{test_root_name}/Project3")
self._temp_report(name="test report", project=report_project)
self._temp_task(name="regular task", project=report_project)
projects = self.api.projects.get_all_ex(
parent=[test_root], shallow_search=True, include_stats=True
).projects
self.assertEqual(
{p.basename for p in projects}, {f"Project{idx+1}" for idx in range(3)}
)
for p in projects:
self.assertEqual(
p.stats.active.total_tasks,
2
if p.basename in ("Project1", "Project2")
else 1
)
for i, type_ in enumerate(("dataset", "pipeline", "report")):
projects = self.api.projects.get_all_ex(
parent=[test_root],
children_type=type_,
shallow_search=True,
include_stats=True,
check_own_contents=True,
).projects
self.assertEqual({p.basename for p in projects}, {f"Project{i+1}"})
p = projects[0]
if type_ in ("dataset",):
self.assertEqual(p.own_datasets, 1)
self.assertIsNone(p.get("own_tasks"))
self.assertEqual(p.stats.datasets.count, 1)
self.assertEqual(p.stats.datasets.tags, dataset_tags)
else:
self.assertEqual(p.own_tasks, 0)
self.assertIsNone(p.get("own_datasets"))
self.assertEqual(
p.stats.active.total_tasks, 1 if p.basename != "Project4" else 0
)
def test_project_aggregations(self):
"""This test requires user with user_auth_only... credentials in db"""
user2_client = APIClient(
@@ -298,12 +364,21 @@ class TestSubProjects(TestService):
**kwargs,
)
def _temp_task(self, client=None, **kwargs):
def _temp_report(self, name, **kwargs):
return self.create_temp(
"reports",
name=name,
object_name="task",
delete_params=self.delete_params,
**kwargs,
)
def _temp_task(self, client=None, name=None, type=None, **kwargs):
return self.create_temp(
"tasks",
delete_params=self.delete_params,
type="testing",
name=db_id(),
type=type or "testing",
name=name or db_id(),
input=dict(view=dict()),
client=client,
**kwargs,

View File

@@ -19,7 +19,9 @@ class TestTaskEvents(TestService):
task_input = dict(
name=name, type="training", input=dict(mapping={}, view=dict(entries=[])),
)
return self.create_temp("tasks", delete_paramse=self.delete_params, **task_input)
return self.create_temp(
"tasks", delete_paramse=self.delete_params, **task_input
)
def _temp_model(self, name="test model events", **kwargs):
self.update_missing(kwargs, name=name, uri="file:///a/b", labels={})
@@ -104,9 +106,7 @@ class TestTaskEvents(TestService):
res.variants[variant]["iter"],
[x or special_iteration for x in range(iter_count)],
)
self.assertEqual(
res.variants[variant]["y"], list(range(iter_count))
)
self.assertEqual(res.variants[variant]["y"], list(range(iter_count)))
# but not in the histogram
data = self.api.events.scalar_metrics_iter_histogram(task=task)
@@ -140,8 +140,7 @@ class TestTaskEvents(TestService):
task=task, batch_size=100, metric=metric_param, count_total=True
)
self.assertEqual(
res.variants[variant]["y"],
[y or new_value for y in range(iter_count)],
res.variants[variant]["y"], [y or new_value for y in range(iter_count)],
)
task_data = self.api.tasks.get_by_id(task=task).task
@@ -198,7 +197,6 @@ class TestTaskEvents(TestService):
with self.api.raises(errors.bad_request.EventsNotAdded):
self.send(log_event)
# send metric events and check that model data always have iteration 0 and only last data is saved
events = [
{
**self._create_task_event("training_stats_scalar", model, iteration),
@@ -212,13 +210,15 @@ class TestTaskEvents(TestService):
for variant_idx in range(5)
]
self.send_batch(events)
data = self.api.events.scalar_metrics_iter_histogram(task=model, model_events=True)
data = self.api.events.scalar_metrics_iter_histogram(
task=model, model_events=True
)
self.assertEqual(list(data), [f"Metric{idx}" for idx in range(5)])
metric_data = data.Metric0
self.assertEqual(list(metric_data), [f"Variant{idx}" for idx in range(5)])
variant_data = metric_data.Variant0
self.assertEqual(variant_data.x, [0])
self.assertEqual(variant_data.y, [1.0])
self.assertEqual(variant_data.x, [0, 1])
self.assertEqual(variant_data.y, [0.0, 1.0])
def test_error_events(self):
task = self._temp_task()

View File

@@ -50,7 +50,9 @@ class TestTasksResetDelete(TestService):
self.assertEqual(res.urls.artifact_urls, [])
task = self.new_task()
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task)
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(
task
)
artifact_urls = self.send_artifacts(task)
event_urls = self.send_debug_image_events(task)
event_urls.update(self.send_plot_events(task))
@@ -74,7 +76,12 @@ class TestTasksResetDelete(TestService):
self.api.tasks.reset(task=task, force=True)
# test urls
task, (published_model_urls, draft_model_urls), artifact_urls, event_urls = self.create_task_with_data()
(
task,
(published_model_urls, draft_model_urls),
artifact_urls,
event_urls,
) = self.create_task_with_data()
res = self.api.tasks.reset(task=task, force=True, return_file_urls=True)
self.assertEqual(set(res.urls.model_urls), draft_model_urls)
self.assertEqual(set(res.urls.event_urls), event_urls)
@@ -101,13 +108,18 @@ class TestTasksResetDelete(TestService):
# with delete_contents flag
project = self.new_project()
task, (published_model_urls, draft_model_urls), artifact_urls, event_urls = self.create_task_with_data(
project=project
)
(
task,
(published_model_urls, draft_model_urls),
artifact_urls,
event_urls,
) = self.create_task_with_data(project=project)
res = self.api.projects.delete(
project=project, force=True, delete_contents=True
)
self.assertEqual(set(res.urls.model_urls), published_model_urls | draft_model_urls)
self.assertEqual(
set(res.urls.model_urls), published_model_urls | draft_model_urls
)
self.assertEqual(res.deleted, 1)
self.assertEqual(res.disassociated_tasks, 0)
self.assertEqual(res.deleted_tasks, 1)
@@ -121,7 +133,9 @@ class TestTasksResetDelete(TestService):
self, **kwargs
) -> Tuple[str, Tuple[Set[str], Set[str]], Set[str], Set[str]]:
task = self.new_task(**kwargs)
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(task, **kwargs)
(_, published_model_urls), (model, draft_model_urls) = self.create_task_models(
task, **kwargs
)
artifact_urls = self.send_artifacts(task)
event_urls = self.send_debug_image_events(task)
event_urls.update(self.send_plot_events(task))
@@ -172,7 +186,7 @@ class TestTasksResetDelete(TestService):
),
self.create_event(
model, "plot", 0, plot_str=f'{{"source": "{url2}"}}', model_event=True
)
),
]
self.send_batch(events)
return {url1, url2}
@@ -181,7 +195,10 @@ class TestTasksResetDelete(TestService):
url_pattern = "url_{num}.txt"
events = [
self.create_event(
task, "training_debug_image", iteration, url=url_pattern.format(num=iteration)
task,
"training_debug_image",
iteration,
url=url_pattern.format(num=iteration),
)
for iteration in range(5)
]

View File

@@ -103,7 +103,7 @@ class TestTasksEdit(TestService):
new_name = "new test"
new_tags = ["by"]
execution_overrides = dict(framework="Caffe")
execution_overrides = dict(framework="Caffe", model_labels={"test": 1.0})
new_task_id = self._clone_task(
task=task,
new_task_name=new_name,
@@ -120,6 +120,7 @@ class TestTasksEdit(TestService):
self.assertEqual(new_task.parent, task)
# self.assertEqual(new_task.execution.parameters, execution["parameters"])
self.assertEqual(new_task.execution.framework, execution_overrides["framework"])
self.assertEqual(new_task.execution.model_labels, {"test": 1})
self.assertEqual(new_task.system_tags, ["test"])
def test_model_check_in_clone(self):

View File

@@ -1 +1 @@
__version__ = "1.9.1"
__version__ = "1.10.0"

View File

@@ -1,4 +1,4 @@
FROM centos/nodejs-12-centos7 AS webapp
FROM node:18-bullseye as webapp_builder
ARG CLEARML_WEB_GIT_URL=https://github.com/allegroai/clearml-web.git
@@ -23,7 +23,8 @@ COPY docker/build/internal_files/clearml.conf.template /tmp/internal_files/
COPY docker/build/internal_files/clearml_subpath.conf.template /tmp/internal_files/
RUN /bin/bash -c '/tmp/internal_files/final_image_preparation.sh'
COPY --from=webapp /opt/open-webapp/build /usr/share/nginx/html
COPY --from=webapp_builder /opt/open-webapp/build /usr/share/nginx/html
COPY --from=webapp_builder /opt/open-webapp/dist/report-widgets /usr/share/nginx/widgets
EXPOSE 8080
EXPOSE 8008

View File

@@ -3,7 +3,8 @@ set -x
set -e
cd /opt/open-webapp/
npm ci --unsafe-perm node-sass
npm ci --legacy-peer-deps
cd /opt/open-webapp/
npm run build
npm run build-widgets

View File

@@ -37,13 +37,13 @@ http {
server {
listen 80 default_server;
listen [::]:80 default_server;
${COMMENT_IPV6_LISTEN}listen [::]:80 default_server;
server_name _;
root /usr/share/nginx/html;
proxy_http_version 1.1;
client_max_body_size 0;
# comppression
# compression
gzip on;
gzip_comp_level 9;
gzip_http_version 1.0;

View File

@@ -48,7 +48,8 @@ EOF
export NGINX_APISERVER_ADDR=${NGINX_APISERVER_ADDRESS:-http://apiserver:8008}
export NGINX_FILESERVER_ADDR=${NGINX_FILESERVER_ADDRESS:-http://fileserver:8081}
envsubst '${NGINX_APISERVER_ADDR} ${NGINX_FILESERVER_ADDR}' < /etc/nginx/clearml.conf.template > /etc/nginx/nginx.conf
COMMENT_IPV6_LISTEN=$([ "$DISABLE_NGINX_IPV6" = "true" ] && echo "#" || echo "") \
envsubst '${COMMENT_IPV6_LISTEN} ${NGINX_APISERVER_ADDR} ${NGINX_FILESERVER_ADDR}' < /etc/nginx/clearml.conf.template > /etc/nginx/nginx.conf
if [[ -n "${CLEARML_SERVER_SUB_PATH}" ]]; then
envsubst '${CLEARML_SERVER_SUB_PATH}' < /etc/nginx/clearml_subpath.conf.template > /etc/nginx/default.d/clearml_subpath.conf

View File

@@ -7,6 +7,7 @@ yum update -y
yum install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
yum install -y python36 python36-pip nginx gcc gcc-c++ python3-devel gettext
yum -y upgrade
python3 -m pip install -U pip
python3 -m pip install -r /opt/clearml/fileserver/requirements.txt
python3 -m pip install -r /opt/clearml/apiserver/requirements.txt
mkdir -p /opt/clearml/log
@@ -16,4 +17,5 @@ ln -s /dev/stderr /var/log/nginx/error.log
mv /etc/nginx/nginx.conf /etc/nginx/nginx.conf.orig
mv /tmp/internal_files/clearml.conf.template /etc/nginx/clearml.conf.template
mv /tmp/internal_files/clearml_subpath.conf.template /etc/nginx/clearml_subpath.conf.template
yum clean all
pip cache purge
yum clean all

View File

@@ -63,7 +63,7 @@ services:
nofile:
soft: 65536
hard: 65536
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.7
restart: unless-stopped
volumes:
- c:/opt/clearml/data/elastic_7:/usr/share/elasticsearch/data

View File

@@ -63,7 +63,7 @@ services:
nofile:
soft: 65536
hard: 65536
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.7
restart: unless-stopped
volumes:
- /opt/clearml/data/elastic_7:/usr/share/elasticsearch/data
@@ -172,7 +172,7 @@ services:
CLEARML_API_SECRET_KEY: ${CLEARML_API_SECRET_KEY:-}
CLEARML_AGENT_GIT_USER: ${CLEARML_AGENT_GIT_USER}
CLEARML_AGENT_GIT_PASS: ${CLEARML_AGENT_GIT_PASS}
CLEARML_AGENT_UPDATE_VERSION: ${CLEARML_AGENT_UPDATE_VERSION:-">=0.17.0"}
CLEARML_AGENT_UPDATE_VERSION: ${CLEARML_AGENT_UPDATE_VERSION:->=0.17.0}
CLEARML_AGENT_DEFAULT_BASE_DOCKER: "ubuntu:18.04"
AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID:-}
AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY:-}