clearml-serving/examples/huggingface/preprocess.py

"""Hugginface preprocessing module for ClearML Serving."""
from typing import Any
from transformers import AutoTokenizer, PreTrainedTokenizer, TensorType


# Notice Preprocess class Must be named "Preprocess"
class Preprocess:
    """Processing class will be run by the ClearML inference services before and after each request."""

    def __init__(self):
        """Set internal state, this will be called only once. (i.e. not per request)."""
        self.tokenizer: PreTrainedTokenizer = AutoTokenizer.from_pretrained("philschmid/MiniLM-L6-H384-uncased-sst2")

    def preprocess(self, body: dict, state: dict, collect_custom_statistics_fn=None) -> Any:
        """Will be run when a request comes into the ClearML inference service."""
        tokens = self.tokenizer(
            text=body['text'],
            max_length=16,
            truncation=True,
            return_tensors=TensorType.NUMPY,
        )

        return [tokens["input_ids"].tolist(), tokens["token_type_ids"].tolist(), tokens["attention_mask"].tolist()]

    def postprocess(self, data: Any, state: dict, collect_custom_statistics_fn=None) -> dict:
        """Will be run whan a request comes back from the Triton Engine."""
        # post process the data returned from the model inference engine
        # data is the return value from model.predict we will put is inside a return value as Y
        return {'data': data.tolist()}