initial commit for oom issues - allow for model unload and restart serving on crash

This commit is contained in:
Eugen Ajechiloae
2024-08-14 17:01:29 +03:00
parent 724c99c605
commit 6ef1f67ad0
8 changed files with 55 additions and 53 deletions

View File

@@ -1,7 +1,6 @@
import json
import os
import gc
import torch
from collections import deque
from pathlib import Path
from random import random
@@ -918,11 +917,13 @@ class ModelRequestProcessor(object):
if k not in self._endpoints:
# atomic
self._engine_processor_lookup[k]._model = None
self._engine_processor_lookup[k]._preprocess = None
del self._engine_processor_lookup[k]
self._engine_processor_lookup.pop(k, None)
gc.collect()
torch.cuda.empty_cache()
if hasattr(self._engine_processor_lookup[k]._preprocess, "unload"):
try:
self._engine_processor_lookup[k]._preprocess.unload()
except Exception as ex:
print("Exception occurred unloading model: {}".format(ex))
self._engine_processor_lookup.pop(k, None)
cleanup = False
model_monitor_update = False
except Exception as ex: