Add huggingface transformers example

2025-06-26 18:16:00 +00:00 · 2022-10-08 02:12:31 +03:00
parent 363276ba79
commit 219fa308df
4 changed files with 174 additions and 0 deletions
--- a/examples/huggingface/preprocess.py
+++ b/examples/huggingface/preprocess.py
@@ -0,0 +1,29 @@
+"""Hugginface preprocessing module for ClearML Serving."""
+from typing import Any
+from transformers import AutoTokenizer, PreTrainedTokenizer, TensorType
+
+
+# Notice Preprocess class Must be named "Preprocess"
+class Preprocess:
+    """Processing class will be run by the ClearML inference services before and after each request."""
+
+    def __init__(self):
+        """Set internal state, this will be called only once. (i.e. not per request)."""
+        self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained("philschmid/MiniLM-L6-H384-uncased-sst2")
+
+    def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any:
+        """Will be run when a request comes into the ClearML inference service."""
+        tokens = self.tokenizer(
+            text=body['text'],
+            max_length=16,
+            truncation=True,
+            return_tensors=TensorType.NUMPY,
+        )
+
+        return [tokens["input_ids"].tolist(), tokens["token_type_ids"].tolist(), tokens["attention_mask"].tolist()]
+
+    def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict:
+        """Will be run whan a request comes back from the Triton Engine."""
+        # post process the data returned from the model inference engine
+        # data is the return value from model.predict we will put is inside a return value as Y
+        return {'data': data.tolist()}