Add storage service support

This commit is contained in:
clearml 2024-12-05 22:21:12 +02:00
parent eb755be001
commit 3bcbc38c4c
7 changed files with 688 additions and 53 deletions

View File

@ -0,0 +1,60 @@
from jsonmodels.fields import StringField, BoolField, ListField, EmbeddedField
from jsonmodels.models import Base
from jsonmodels.validators import Enum
class AWSBucketSettings(Base):
bucket = StringField()
subdir = StringField()
host = StringField()
key = StringField()
secret = StringField()
token = StringField()
multipart = BoolField(default=True)
acl = StringField()
secure = BoolField(default=True)
region = StringField()
verify = BoolField(default=True)
use_credentials_chain = BoolField(default=False)
class AWSSettings(Base):
key = StringField()
secret = StringField()
region = StringField()
token = StringField()
use_credentials_chain = BoolField(default=False)
buckets = ListField(items_types=[AWSBucketSettings])
class GoogleBucketSettings(Base):
bucket = StringField()
subdir = StringField()
project = StringField()
credentials_json = StringField()
class GoogleSettings(Base):
project = StringField()
credentials_json = StringField()
buckets = ListField(items_types=[GoogleBucketSettings])
class AzureContainerSettings(Base):
account_name = StringField()
account_key = StringField()
container_name = StringField()
class AzureSettings(Base):
containers = ListField(items_types=[AzureContainerSettings])
class SetSettingsRequest(Base):
aws = EmbeddedField(AWSSettings)
google = EmbeddedField(GoogleSettings)
azure = EmbeddedField(AzureSettings)
class ResetSettingsRequest(Base):
keys = ListField([str], item_validators=[Enum("aws", "google", "azure")])

View File

@ -6,7 +6,6 @@ from redis import Redis
from apiserver.config_repo import config from apiserver.config_repo import config
from apiserver.bll.project import project_ids_with_children from apiserver.bll.project import project_ids_with_children
from apiserver.database.model import EntityVisibility
from apiserver.database.model.base import GetMixin from apiserver.database.model.base import GetMixin
from apiserver.database.model.model import Model from apiserver.database.model.model import Model
from apiserver.database.model.task.task import Task from apiserver.database.model.task.task import Task

View File

@ -1,14 +1,32 @@
import json
import os
import tempfile
from copy import copy from copy import copy
from datetime import datetime
from typing import Optional, Sequence
import attr
from boltons.cacheutils import cachedproperty from boltons.cacheutils import cachedproperty
from clearml.backend_config.bucket_config import ( from clearml.backend_config.bucket_config import (
S3BucketConfigurations, S3BucketConfigurations,
AzureContainerConfigurations, AzureContainerConfigurations,
GSBucketConfigurations, GSBucketConfigurations,
AzureContainerConfig,
GSBucketConfig,
S3BucketConfig,
) )
from apiserver.apierrors import errors
from apiserver.apimodels.storage import SetSettingsRequest
from apiserver.config_repo import config from apiserver.config_repo import config
from apiserver.database.model.storage_settings import (
StorageSettings,
GoogleBucketSettings,
AWSSettings,
AzureStorageSettings,
GoogleStorageSettings,
)
from apiserver.database.utils import id as db_id
log = config.logger(__file__) log = config.logger(__file__)
@ -32,17 +50,224 @@ class StorageBLL:
def get_azure_settings_for_company( def get_azure_settings_for_company(
self, self,
company_id: str, company_id: str,
db_settings: StorageSettings = None,
query_db: bool = True,
) -> AzureContainerConfigurations: ) -> AzureContainerConfigurations:
return copy(self._default_azure_configs) if not db_settings and query_db:
db_settings = (
StorageSettings.objects(company=company_id).only("azure").first()
)
if not db_settings or not db_settings.azure:
return copy(self._default_azure_configs)
azure = db_settings.azure
return AzureContainerConfigurations(
container_configs=[
AzureContainerConfig(**entry.to_proper_dict())
for entry in (azure.containers or [])
]
)
def get_gs_settings_for_company( def get_gs_settings_for_company(
self, self,
company_id: str, company_id: str,
db_settings: StorageSettings = None,
query_db: bool = True,
json_string: bool = False,
) -> GSBucketConfigurations: ) -> GSBucketConfigurations:
return copy(self._default_gs_configs) if not db_settings and query_db:
db_settings = (
StorageSettings.objects(company=company_id).only("google").first()
)
if not db_settings or not db_settings.google:
if not json_string:
return copy(self._default_gs_configs)
if self._default_gs_configs._buckets:
buckets = [
attr.evolve(
b,
credentials_json=self._assure_json_string(b.credentials_json),
)
for b in self._default_gs_configs._buckets
]
else:
buckets = self._default_gs_configs._buckets
return GSBucketConfigurations(
buckets=buckets,
default_project=self._default_gs_configs._default_project,
default_credentials=self._assure_json_string(
self._default_gs_configs._default_credentials
),
)
def get_bucket_config(bc: GoogleBucketSettings) -> GSBucketConfig:
data = bc.to_proper_dict()
if not json_string and bc.credentials_json:
data["credentials_json"] = self._assure_json_file(bc.credentials_json)
return GSBucketConfig(**data)
google = db_settings.google
buckets_configs = [get_bucket_config(b) for b in (google.buckets or [])]
return GSBucketConfigurations(
buckets=buckets_configs,
default_project=google.project,
default_credentials=google.credentials_json
if json_string
else self._assure_json_file(google.credentials_json),
)
def get_aws_settings_for_company( def get_aws_settings_for_company(
self, self,
company_id: str, company_id: str,
db_settings: StorageSettings = None,
query_db: bool = True,
) -> S3BucketConfigurations: ) -> S3BucketConfigurations:
return copy(self._default_aws_configs) if not db_settings and query_db:
db_settings = (
StorageSettings.objects(company=company_id).only("aws").first()
)
if not db_settings or not db_settings.aws:
return copy(self._default_aws_configs)
aws = db_settings.aws
buckets_configs = S3BucketConfig.from_list(
[b.to_proper_dict() for b in (aws.buckets or [])]
)
return S3BucketConfigurations(
buckets=buckets_configs,
default_key=aws.key,
default_secret=aws.secret,
default_region=aws.region,
default_use_credentials_chain=aws.use_credentials_chain,
default_token=aws.token,
default_extra_args={},
)
def _assure_json_file(self, name_or_content: str) -> str:
if not name_or_content:
return name_or_content
if name_or_content.endswith(".json") or os.path.exists(name_or_content):
return name_or_content
try:
json.loads(name_or_content)
except Exception:
return name_or_content
with tempfile.NamedTemporaryFile(
mode="wt", delete=False, suffix=".json"
) as tmp:
tmp.write(name_or_content)
return tmp.name
def _assure_json_string(self, name_or_content: str) -> Optional[str]:
if not name_or_content:
return name_or_content
try:
json.loads(name_or_content)
return name_or_content
except Exception:
pass
try:
with open(name_or_content) as fp:
return fp.read()
except Exception:
return None
def get_company_settings(self, company_id: str) -> dict:
db_settings = StorageSettings.objects(company=company_id).first()
aws = self.get_aws_settings_for_company(company_id, db_settings, query_db=False)
aws_dict = {
"key": aws._default_key,
"secret": aws._default_secret,
"token": aws._default_token,
"region": aws._default_region,
"use_credentials_chain": aws._default_use_credentials_chain,
"buckets": [attr.asdict(b) for b in aws._buckets],
}
gs = self.get_gs_settings_for_company(
company_id, db_settings, query_db=False, json_string=True
)
gs_dict = {
"project": gs._default_project,
"credentials_json": gs._default_credentials,
"buckets": [attr.asdict(b) for b in gs._buckets],
}
azure = self.get_azure_settings_for_company(company_id, db_settings)
azure_dict = {
"containers": [attr.asdict(ac) for ac in azure._container_configs],
}
return {
"aws": aws_dict,
"google": gs_dict,
"azure": azure_dict,
"last_update": db_settings.last_update if db_settings else None,
}
def set_company_settings(
self, company_id: str, settings: SetSettingsRequest
) -> int:
update_dict = {}
if settings.aws:
update_dict["aws"] = {
**{
k: v
for k, v in settings.aws.to_struct().items()
if k in AWSSettings.get_fields()
}
}
if settings.azure:
update_dict["azure"] = {
**{
k: v
for k, v in settings.azure.to_struct().items()
if k in AzureStorageSettings.get_fields()
}
}
if settings.google:
update_dict["google"] = {
**{
k: v
for k, v in settings.google.to_struct().items()
if k in GoogleStorageSettings.get_fields()
}
}
cred_json = update_dict["google"].get("credentials_json")
if cred_json:
try:
json.loads(cred_json)
except Exception as ex:
raise errors.bad_request.ValidationError(
f"Invalid json credentials: {str(ex)}"
)
if not update_dict:
raise errors.bad_request.ValidationError("No settings were provided")
settings = StorageSettings.objects(company=company_id).only("id").first()
settings_id = settings.id if settings else db_id()
return StorageSettings.objects(id=settings_id).update(
upsert=True,
id=settings_id,
company=company_id,
last_update=datetime.utcnow(),
**update_dict,
)
def reset_company_settings(self, company_id: str, keys: Sequence[str]) -> int:
return StorageSettings.objects(company=company_id).update(
last_update=datetime.utcnow(), **{f"unset__{k}": 1 for k in keys}
)

View File

@ -0,0 +1,76 @@
from mongoengine import (
Document,
EmbeddedDocument,
StringField,
DateTimeField,
EmbeddedDocumentListField,
EmbeddedDocumentField,
BooleanField,
)
from apiserver.database import Database, strict
from apiserver.database.model import DbModelMixin
from apiserver.database.model.base import ProperDictMixin
class AWSBucketSettings(EmbeddedDocument, ProperDictMixin):
bucket = StringField()
subdir = StringField()
host = StringField()
key = StringField()
secret = StringField()
token = StringField()
multipart = BooleanField()
acl = StringField()
secure = BooleanField()
region = StringField()
verify = BooleanField()
use_credentials_chain = BooleanField()
class AWSSettings(EmbeddedDocument, DbModelMixin):
key = StringField()
secret = StringField()
region = StringField()
token = StringField()
use_credentials_chain = BooleanField()
buckets = EmbeddedDocumentListField(AWSBucketSettings)
class GoogleBucketSettings(EmbeddedDocument, ProperDictMixin):
bucket = StringField()
subdir = StringField()
project = StringField()
credentials_json = StringField()
class GoogleStorageSettings(EmbeddedDocument, DbModelMixin):
project = StringField()
credentials_json = StringField()
buckets = EmbeddedDocumentListField(GoogleBucketSettings)
class AzureStorageContainerSettings(EmbeddedDocument, ProperDictMixin):
account_name = StringField(required=True)
account_key = StringField(required=True)
container_name = StringField()
class AzureStorageSettings(EmbeddedDocument, DbModelMixin):
containers = EmbeddedDocumentListField(AzureStorageContainerSettings)
class StorageSettings(DbModelMixin, Document):
meta = {
"db_alias": Database.backend,
"strict": strict,
"indexes": [
"company"
],
}
id = StringField(primary_key=True)
company = StringField(required=True, unique=True)
last_update = DateTimeField()
aws: AWSSettings = EmbeddedDocumentField(AWSSettings)
google: GoogleStorageSettings = EmbeddedDocumentField(GoogleStorageSettings)
azure: AzureStorageSettings = EmbeddedDocumentField(AzureStorageSettings)

View File

@ -0,0 +1,243 @@
_description: """This service provides storage settings managmement"""
_default {
internal: true
allow_roles: ["system", "root", "admin"]
}
_definitions {
include "_common.conf"
aws_bucket {
type: object
description: Settings per S3 bucket
properties {
bucket {
description: The name of the bucket
type: string
}
subdir {
description: The path to match
type: string
}
host {
description: Host address (for minio servers)
type: string
}
key {
description: Access key
type: string
}
secret {
description: Secret key
type: string
}
token {
description: Access token
type: string
}
multipart {
description: Multipart upload
type: boolean
default: true
}
acl {
description: ACL
type: string
}
secure {
description: Use SSL connection
type: boolean
default: true
}
region {
description: AWS Region
type: string
}
verify {
description: Verify server certificate
type: boolean
default: true
}
use_credentials_chain {
description: Use host configured credentials
type: boolean
default: false
}
}
}
aws {
type: object
description: AWS S3 storage settings
properties {
key {
description: Access key
type: string
}
secret {
description: Secret key
type: string
}
region {
description: AWS region
type: string
}
token {
description: Access token
type: string
}
use_credentials_chain {
description: If set then use host credentials
type: boolean
default: false
}
buckets {
description: Credential settings per bucket
type: array
items {"$ref": "#/definitions/aws_bucket"}
}
}
}
google_bucket {
type: object
description: Settings per Google storage bucket
properties {
bucket {
description: The name of the bucket
type: string
}
project {
description: The name of the project
type: string
}
subdir {
description: The path to match
type: string
}
credentials_json {
description: The contents of the credentials json file
type: string
}
}
}
google {
type: object
description: Google storage settings
properties {
project {
description: Project name
type: string
}
credentials_json {
description: The contents of the credentials json file
type: string
}
buckets {
description: Credentials per bucket
type: array
items {"$ref": "#/definitions/google_bucket"}
}
}
}
azure_container {
type: object
description: Azure container settings
properties {
account_name {
description: Account name
type: string
}
account_key {
description: Account key
type: string
}
container_name {
description: The name of the container
type: string
}
}
}
azure {
type: object
description: Azure storage settings
properties {
containers {
description: Credentials per container
type: array
items {"$ref": "#/definitions/azure_container"}
}
}
}
}
set_settings {
"2.31" {
description: Set Storage settings
request {
type: object
properties {
aws {"$ref": "#/definitions/aws"}
google {"$ref": "#/definitions/google"}
azure {"$ref": "#/definitions/azure"}
}
}
response {
type: object
properties {
updated {
description: "Number of settings documents updated (0 or 1)"
type: integer
enum: [0, 1]
}
}
}
}
}
reset_settings {
"2.31" {
description: Reset selected storage settings
request {
type: object
properties {
keys {
description: The names of the settings to delete
type: array
items {
type: string
enum: ["azure", "aws", "google"]
}
}
}
}
response {
type: object
properties {
updated {
description: "Number of settings documents updated (0 or 1)"
type: integer
enum: [0, 1]
}
}
}
}
}
get_settings {
"2.22" {
description: Get storage settings
request {
type: object
additionalProperties: false
}
response {
type: object
properties {
last_update {
description: "Settings last update time (UTC) "
type: string
format: "date-time"
}
aws {"$ref": "#/definitions/aws"}
google {"$ref": "#/definitions/google"}
azure {"$ref": "#/definitions/azure"}
}
}
}
}

View File

@ -0,0 +1,22 @@
from apiserver.apimodels.storage import ResetSettingsRequest, SetSettingsRequest
from apiserver.bll.storage import StorageBLL
from apiserver.service_repo import endpoint, APICall
storage_bll = StorageBLL()
@endpoint("storage.get_settings")
def get_settings(call: APICall, company: str, _):
call.result.data = {"settings": storage_bll.get_company_settings(company)}
@endpoint("storage.set_settings")
def set_settings(call: APICall, company: str, request: SetSettingsRequest):
call.result.data = {"updated": storage_bll.set_company_settings(company, request)}
@endpoint("storage.reset_settings")
def reset_settings(call: APICall, company: str, request: ResetSettingsRequest):
call.result.data = {
"updated": storage_bll.reset_company_settings(company, request.keys)
}

View File

@ -26,6 +26,7 @@ class TestServing(TestService):
for container_id in (container_id1, container_id2) for container_id in (container_id1, container_id2)
] ]
# registering instances
for container_info in container_infos: for container_info in container_infos:
self.api.serving.register_container( self.api.serving.register_container(
**container_info, **container_info,
@ -39,63 +40,72 @@ class TestServing(TestService):
requests_num=1000 * mul, requests_num=1000 * mul,
requests_min=5 * mul, # requests per minute requests_min=5 * mul, # requests per minute
latency_ms=100 * mul, # average latency latency_ms=100 * mul, # average latency
machine_stats={ # the same structure here as used by worker status_reports machine_stats={ # the same structure here as used by worker status_reports
"cpu_usage": [10, 20], "cpu_usage": [10, 20],
"memory_used": 50, "memory_used": 50,
} },
) )
endpoints = self.api.serving.get_endpoints().endpoints
details = self.api.serving.get_endpoint_details(endpoint_url=url)
details = self.api.serving.get_endpoint_details(endpoint_url=url)
# getting endpoints and endpoint details
endpoints = self.api.serving.get_endpoints().endpoints
self.assertTrue(any(e for e in endpoints if e.url == url))
details = self.api.serving.get_endpoint_details(endpoint_url=url)
self.assertEqual(details.url, url)
self.assertEqual(details.uptime_sec, 2000)
self.assertEqual(
{
inst.id: [
inst[field]
for field in (
"uptime_sec",
"requests",
"requests_min",
"latency_ms",
)
]
for inst in details.instances
},
{"container_1": [1000, 1000, 5, 100], "container_2": [2000, 2000, 10, 200]},
)
# make sure that the first call did not invalidate anything
new_details = self.api.serving.get_endpoint_details(endpoint_url=url)
self.assertEqual(details, new_details)
# charts
sleep(5) # give time to ES to accomodate data sleep(5) # give time to ES to accomodate data
to_date = int(time()) + 40 to_date = int(time()) + 40
from_date = to_date - 100 from_date = to_date - 100
res1 = self.api.serving.get_endpoint_metrics_history( for metric_type, title, value in (
endpoint_url=url, (None, "Number of Requests", 3000),
from_date=from_date, ("requests_min", "Requests per Minute", 15),
to_date=to_date, ("latency_ms", "Average Latency (ms)", 150),
interval=1, ("cpu_count", "CPU Count", 4),
) ("cpu_util", "Average CPU Load (%)", 15),
res2 = self.api.serving.get_endpoint_metrics_history( ("ram_total", "RAM Total (GB)", 100),
endpoint_url=url, ):
from_date=from_date, res = self.api.serving.get_endpoint_metrics_history(
to_date=to_date, endpoint_url=url,
interval=1, from_date=from_date,
metric_type="requests_min", to_date=to_date,
) interval=1,
res3 = self.api.serving.get_endpoint_metrics_history( **({"metric_type": metric_type} if metric_type else {}),
endpoint_url=url, )
from_date=from_date, self.assertEqual(res.computed_interval, 40)
to_date=to_date, self.assertEqual(res.total.title, title)
interval=1, length = len(res.total.dates)
metric_type="latency_ms", self.assertTrue(3>=length>=1)
) self.assertEqual(len(res.total["values"]), length)
res4 = self.api.serving.get_endpoint_metrics_history( self.assertIn(value, res.total["values"])
endpoint_url=url, self.assertEqual(set(res.instances), {container_id1, container_id2})
from_date=from_date, for inst in res.instances.values():
to_date=to_date, self.assertEqual(inst.dates, res.total.dates)
interval=1, self.assertEqual(len(inst["values"]), length)
metric_type="cpu_count",
)
res5 = self.api.serving.get_endpoint_metrics_history(
endpoint_url=url,
from_date=from_date,
to_date=to_date,
interval=1,
metric_type="cpu_util",
)
res6 = self.api.serving.get_endpoint_metrics_history(
endpoint_url=url,
from_date=from_date,
to_date=to_date,
interval=1,
metric_type="ram_total",
)
# unregistering containers
for container_id in (container_id1, container_id2): for container_id in (container_id1, container_id2):
self.api.serving.unregister_container(container_id=container_id) self.api.serving.unregister_container(container_id=container_id)
endpoints = self.api.serving.get_endpoints().endpoints endpoints = self.api.serving.get_endpoints().endpoints
self.assertFalse(any(e for e in endpoints if e.url == url))
with self.api.raises(errors.bad_request.NoContainersForUrl): with self.api.raises(errors.bad_request.NoContainersForUrl):
details = self.api.serving.get_endpoint_details(endpoint_url=url) self.api.serving.get_endpoint_details(endpoint_url=url)
pass