mirror of
https://github.com/clearml/clearml-server
synced 2025-06-26 23:15:47 +00:00
Improve resource monitoring
This commit is contained in:
parent
cc93c15f8a
commit
9debe1adcd
@ -12,3 +12,4 @@ class ReportStatsOptionResponse(Base):
|
|||||||
enabled_time = DateTimeField(nullable=True)
|
enabled_time = DateTimeField(nullable=True)
|
||||||
enabled_version = StringField(nullable=True)
|
enabled_version = StringField(nullable=True)
|
||||||
enabled_user = StringField(nullable=True)
|
enabled_user = StringField(nullable=True)
|
||||||
|
current_version = StringField()
|
||||||
|
@ -59,6 +59,8 @@ class ResourceMonitor(Thread):
|
|||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
while True:
|
while True:
|
||||||
|
sleep(self.sample_interval_sec)
|
||||||
|
|
||||||
sample = self._get_sample()
|
sample = self._get_sample()
|
||||||
|
|
||||||
with self._lock:
|
with self._lock:
|
||||||
@ -67,21 +69,20 @@ class ResourceMonitor(Thread):
|
|||||||
self._avg = self._avg.avg(sample, self._count)
|
self._avg = self._avg.avg(sample, self._count)
|
||||||
self._count += 1
|
self._count += 1
|
||||||
|
|
||||||
sleep(self.sample_interval_sec)
|
|
||||||
|
|
||||||
def get_stats(self) -> dict:
|
def get_stats(self) -> dict:
|
||||||
""" Returns current resource statistics and clears internal resource statistics """
|
""" Returns current resource statistics and clears internal resource statistics """
|
||||||
with self._lock:
|
with self._lock:
|
||||||
min_ = attr.asdict(self._min)
|
min_ = attr.asdict(self._min)
|
||||||
max_ = attr.asdict(self._max)
|
max_ = attr.asdict(self._max)
|
||||||
avg = attr.asdict(self._avg)
|
avg = attr.asdict(self._avg)
|
||||||
res = {
|
interval = datetime.utcnow() - self._clear_time
|
||||||
"interval_sec": (datetime.utcnow() - self._clear_time).total_seconds(),
|
|
||||||
"num_cores": psutil.cpu_count(),
|
|
||||||
**{
|
|
||||||
k: {"min": v, "max": max_[k], "avg": avg[k]}
|
|
||||||
for k, v in min_.items()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self._clear()
|
self._clear()
|
||||||
return res
|
|
||||||
|
return {
|
||||||
|
"interval_sec": interval.total_seconds(),
|
||||||
|
"num_cores": psutil.cpu_count(),
|
||||||
|
**{
|
||||||
|
k: {"min": v, "max": max_[k], "avg": avg[k]}
|
||||||
|
for k, v in min_.items()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -40,10 +40,6 @@ class Settings(DbModelMixin, Document):
|
|||||||
""" Sets a new value or adds a new key/value setting (if key does not exist) """
|
""" Sets a new value or adds a new key/value setting (if key does not exist) """
|
||||||
key = key.strip(sep)
|
key = key.strip(sep)
|
||||||
res = Settings.objects(key=key).update(key=key, value=value, upsert=True)
|
res = Settings.objects(key=key).update(key=key, value=value, upsert=True)
|
||||||
# if Settings.objects(key=key).only("key"):
|
|
||||||
#
|
|
||||||
# else:
|
|
||||||
# res = Settings(key=key, value=value).save()
|
|
||||||
return bool(res)
|
return bool(res)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -86,6 +86,7 @@ endpoints {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
report_stats_option {
|
report_stats_option {
|
||||||
|
allow_roles = [ "*" ]
|
||||||
"2.4" {
|
"2.4" {
|
||||||
description: "Get or set the report statistics option per-company"
|
description: "Get or set the report statistics option per-company"
|
||||||
request {
|
request {
|
||||||
@ -117,6 +118,10 @@ report_stats_option {
|
|||||||
description: "If enabled, returns Id of the user who enabled the option"
|
description: "If enabled, returns Id of the user who enabled the option"
|
||||||
type: string
|
type: string
|
||||||
}
|
}
|
||||||
|
current_version {
|
||||||
|
description: "Returns the current server version"
|
||||||
|
type: string
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -87,7 +87,8 @@ def report_stats(call: APICall, company: str, request: ReportStatsOptionRequest)
|
|||||||
raise errors.server_error.InternalError(
|
raise errors.server_error.InternalError(
|
||||||
f"Failed setting report_stats to {enabled}"
|
f"Failed setting report_stats to {enabled}"
|
||||||
)
|
)
|
||||||
|
data = stats_option.to_mongo()
|
||||||
result = ReportStatsOptionResponse(**stats_option.to_mongo())
|
data["current_version"] = current_version
|
||||||
|
result = ReportStatsOptionResponse(**data)
|
||||||
|
|
||||||
call.result.data_model = result
|
call.result.data_model = result
|
||||||
|
@ -12,7 +12,7 @@ class ThreadsManager:
|
|||||||
self.objects = {}
|
self.objects = {}
|
||||||
self.lock = Lock()
|
self.lock = Lock()
|
||||||
|
|
||||||
for name, thread in threads.items():
|
for thread_name, thread in threads.items():
|
||||||
if issubclass(thread, Thread):
|
if issubclass(thread, Thread):
|
||||||
thread = thread()
|
thread = thread()
|
||||||
thread.start()
|
thread.start()
|
||||||
@ -20,9 +20,9 @@ class ThreadsManager:
|
|||||||
if not thread.is_alive():
|
if not thread.is_alive():
|
||||||
thread.start()
|
thread.start()
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Expected thread or thread class ({name}): {thread}")
|
raise Exception(f"Expected thread or thread class ({thread_name}): {thread}")
|
||||||
|
|
||||||
self.objects[name] = thread
|
self.objects[thread_name] = thread
|
||||||
|
|
||||||
def register(self, thread_name, daemon=True):
|
def register(self, thread_name, daemon=True):
|
||||||
def decorator(f):
|
def decorator(f):
|
||||||
|
Loading…
Reference in New Issue
Block a user