mirror of
https://github.com/clearml/clearml-server
synced 2025-03-03 02:33:02 +00:00
Improve resource monitoring
This commit is contained in:
parent
cc93c15f8a
commit
9debe1adcd
@ -12,3 +12,4 @@ class ReportStatsOptionResponse(Base):
|
||||
enabled_time = DateTimeField(nullable=True)
|
||||
enabled_version = StringField(nullable=True)
|
||||
enabled_user = StringField(nullable=True)
|
||||
current_version = StringField()
|
||||
|
@ -59,6 +59,8 @@ class ResourceMonitor(Thread):
|
||||
|
||||
def run(self):
|
||||
while True:
|
||||
sleep(self.sample_interval_sec)
|
||||
|
||||
sample = self._get_sample()
|
||||
|
||||
with self._lock:
|
||||
@ -67,21 +69,20 @@ class ResourceMonitor(Thread):
|
||||
self._avg = self._avg.avg(sample, self._count)
|
||||
self._count += 1
|
||||
|
||||
sleep(self.sample_interval_sec)
|
||||
|
||||
def get_stats(self) -> dict:
|
||||
""" Returns current resource statistics and clears internal resource statistics """
|
||||
with self._lock:
|
||||
min_ = attr.asdict(self._min)
|
||||
max_ = attr.asdict(self._max)
|
||||
avg = attr.asdict(self._avg)
|
||||
res = {
|
||||
"interval_sec": (datetime.utcnow() - self._clear_time).total_seconds(),
|
||||
"num_cores": psutil.cpu_count(),
|
||||
**{
|
||||
k: {"min": v, "max": max_[k], "avg": avg[k]}
|
||||
for k, v in min_.items()
|
||||
}
|
||||
}
|
||||
interval = datetime.utcnow() - self._clear_time
|
||||
self._clear()
|
||||
return res
|
||||
|
||||
return {
|
||||
"interval_sec": interval.total_seconds(),
|
||||
"num_cores": psutil.cpu_count(),
|
||||
**{
|
||||
k: {"min": v, "max": max_[k], "avg": avg[k]}
|
||||
for k, v in min_.items()
|
||||
}
|
||||
}
|
||||
|
@ -40,10 +40,6 @@ class Settings(DbModelMixin, Document):
|
||||
""" Sets a new value or adds a new key/value setting (if key does not exist) """
|
||||
key = key.strip(sep)
|
||||
res = Settings.objects(key=key).update(key=key, value=value, upsert=True)
|
||||
# if Settings.objects(key=key).only("key"):
|
||||
#
|
||||
# else:
|
||||
# res = Settings(key=key, value=value).save()
|
||||
return bool(res)
|
||||
|
||||
@classmethod
|
||||
|
@ -86,6 +86,7 @@ endpoints {
|
||||
}
|
||||
}
|
||||
report_stats_option {
|
||||
allow_roles = [ "*" ]
|
||||
"2.4" {
|
||||
description: "Get or set the report statistics option per-company"
|
||||
request {
|
||||
@ -117,6 +118,10 @@ report_stats_option {
|
||||
description: "If enabled, returns Id of the user who enabled the option"
|
||||
type: string
|
||||
}
|
||||
current_version {
|
||||
description: "Returns the current server version"
|
||||
type: string
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -87,7 +87,8 @@ def report_stats(call: APICall, company: str, request: ReportStatsOptionRequest)
|
||||
raise errors.server_error.InternalError(
|
||||
f"Failed setting report_stats to {enabled}"
|
||||
)
|
||||
|
||||
result = ReportStatsOptionResponse(**stats_option.to_mongo())
|
||||
data = stats_option.to_mongo()
|
||||
data["current_version"] = current_version
|
||||
result = ReportStatsOptionResponse(**data)
|
||||
|
||||
call.result.data_model = result
|
||||
|
@ -12,7 +12,7 @@ class ThreadsManager:
|
||||
self.objects = {}
|
||||
self.lock = Lock()
|
||||
|
||||
for name, thread in threads.items():
|
||||
for thread_name, thread in threads.items():
|
||||
if issubclass(thread, Thread):
|
||||
thread = thread()
|
||||
thread.start()
|
||||
@ -20,9 +20,9 @@ class ThreadsManager:
|
||||
if not thread.is_alive():
|
||||
thread.start()
|
||||
else:
|
||||
raise Exception(f"Expected thread or thread class ({name}): {thread}")
|
||||
raise Exception(f"Expected thread or thread class ({thread_name}): {thread}")
|
||||
|
||||
self.objects[name] = thread
|
||||
self.objects[thread_name] = thread
|
||||
|
||||
def register(self, thread_name, daemon=True):
|
||||
def decorator(f):
|
||||
|
Loading…
Reference in New Issue
Block a user