initial commit for oom issues - allow for model unload and restart serving on crash

2025-06-26 18:16:00 +00:00 · 2024-08-14 17:01:29 +03:00
parent 724c99c605
commit 6ef1f67ad0
8 changed files with 55 additions and 53 deletions
--- a/clearml_serving/serving/model_request_processor.py
+++ b/clearml_serving/serving/model_request_processor.py
@@ -1,7 +1,6 @@
 import json
 import os
 import gc
-import torch
 from collections import deque
 from pathlib import Path
 from random import random
@@ -918,11 +917,13 @@ class ModelRequestProcessor(object):
                        if k not in self._endpoints:
                            # atomic
                            self._engine_processor_lookup[k]._model = None
-                            self._engine_processor_lookup[k]._preprocess = None
-                            del self._engine_processor_lookup[k]
-                            self._engine_processor_lookup.pop(k, None)
                            gc.collect()
-                            torch.cuda.empty_cache()
+                            if hasattr(self._engine_processor_lookup[k]._preprocess, "unload"):
+                                try:
+                                    self._engine_processor_lookup[k]._preprocess.unload()
+                                except Exception as ex:
+                                    print("Exception occurred unloading model: {}".format(ex))
+                            self._engine_processor_lookup.pop(k, None)
                cleanup = False
                model_monitor_update = False
            except Exception as ex: