diff --git a/clearml_serving/engines/triton/Dockerfile b/clearml_serving/engines/triton/Dockerfile index 5d7a3f0..dd03cc2 100644 --- a/clearml_serving/engines/triton/Dockerfile +++ b/clearml_serving/engines/triton/Dockerfile @@ -5,13 +5,13 @@ FROM nvcr.io/nvidia/tritonserver:22.04-py3 ENV LC_ALL=C.UTF-8 # install base package -RUN pip3 install clearml-serving +RUN pip3 install --no-cache-dir clearml-serving # get latest execution code from the git repository # RUN cd $HOME && git clone https://github.com/allegroai/clearml-serving.git COPY clearml_serving /root/clearml/clearml_serving -RUN pip3 install -r /root/clearml/clearml_serving/engines/triton/requirements.txt +RUN pip3 install --no-cache-dir -r /root/clearml/clearml_serving/engines/triton/requirements.txt # default serving port EXPOSE 8001 diff --git a/clearml_serving/engines/triton/Dockerfile.tr2207 b/clearml_serving/engines/triton/Dockerfile.tr2207 new file mode 100644 index 0000000..ab8dc7c --- /dev/null +++ b/clearml_serving/engines/triton/Dockerfile.tr2207 @@ -0,0 +1,23 @@ + +FROM nvcr.io/nvidia/tritonserver:22.07-py3 + + +ENV LC_ALL=C.UTF-8 + +# install base package +RUN pip3 install --no-cache-dir -U pip +RUN pip3 install --no-cache-dir clearml-serving + +# get latest execution code from the git repository +# RUN cd $HOME && git clone https://github.com/allegroai/clearml-serving.git +COPY clearml_serving /root/clearml/clearml_serving + +RUN pip3 install --no-cache-dir -r /root/clearml/clearml_serving/engines/triton/requirements.txt + +# default serving port +EXPOSE 8001 + +# environement variable to load Task from CLEARML_SERVING_TASK_ID, CLEARML_SERVING_PORT + +WORKDIR /root/clearml/ +ENTRYPOINT ["clearml_serving/engines/triton/entrypoint.sh"] diff --git a/clearml_serving/serving/Dockerfile b/clearml_serving/serving/Dockerfile index 7d6c8c7..198f2fc 100644 --- a/clearml_serving/serving/Dockerfile +++ b/clearml_serving/serving/Dockerfile @@ -4,13 +4,13 @@ FROM python:3.9-bullseye ENV LC_ALL=C.UTF-8 # install base package -RUN pip3 install clearml-serving +RUN pip3 install --no-cache-dir clearml-serving # get latest execution code from the git repository # RUN cd $HOME && git clone https://github.com/allegroai/clearml-serving.git COPY clearml_serving /root/clearml/clearml_serving -RUN pip3 install -r /root/clearml/clearml_serving/serving/requirements.txt +RUN pip3 install --no-cache-dir -r /root/clearml/clearml_serving/serving/requirements.txt # default serving port EXPOSE 8080 diff --git a/clearml_serving/serving/entrypoint.sh b/clearml_serving/serving/entrypoint.sh index 7d1b778..e1a5bbc 100755 --- a/clearml_serving/serving/entrypoint.sh +++ b/clearml_serving/serving/entrypoint.sh @@ -13,7 +13,7 @@ SERVING_PORT="${CLEARML_SERVING_PORT:-8080}" GUNICORN_NUM_PROCESS="${CLEARML_SERVING_NUM_PROCESS:-4}" GUNICORN_SERVING_TIMEOUT="${GUNICORN_SERVING_TIMEOUT:-600}" GUNICORN_MAX_REQUESTS="${GUNICORN_MAX_REQUESTS:-0}" -UVICORN_SERVE_LOOP="${UVICORN_SERVE_LOOP:-asyncio}" +UVICORN_SERVE_LOOP="${UVICORN_SERVE_LOOP:-uvloop}" UVICORN_LOG_LEVEL="${UVICORN_LOG_LEVEL:-warning}" # set default internal serve endpoint (for request pipelining) @@ -41,10 +41,18 @@ fi if [ -z "$CLEARML_USE_GUNICORN" ] then - echo "Starting Uvicorn server" - PYTHONPATH=$(pwd) python3 -m uvicorn \ - clearml_serving.serving.main:app --log-level $UVICORN_LOG_LEVEL --host 0.0.0.0 --port $SERVING_PORT --loop $UVICORN_SERVE_LOOP \ - $UVICORN_EXTRA_ARGS + if [ -z "$CLEARML_SERVING_NUM_PROCESS" ] + then + echo "Starting Uvicorn server - single worker" + PYTHONPATH=$(pwd) python3 -m uvicorn \ + clearml_serving.serving.main:app --log-level $UVICORN_LOG_LEVEL --host 0.0.0.0 --port $SERVING_PORT --loop $UVICORN_SERVE_LOOP \ + $UVICORN_EXTRA_ARGS + else + echo "Starting Uvicorn server - multi worker" + PYTHONPATH=$(pwd) python3 clearml_serving/serving/uvicorn_mp_entrypoint.py \ + clearml_serving.serving.main:app --log-level $UVICORN_LOG_LEVEL --host 0.0.0.0 --port $SERVING_PORT --loop $UVICORN_SERVE_LOOP \ + --workers $CLEARML_SERVING_NUM_PROCESS $UVICORN_EXTRA_ARGS + fi else echo "Starting Gunicorn server" # start service diff --git a/clearml_serving/statistics/Dockerfile b/clearml_serving/statistics/Dockerfile index e4e692d..4b430bc 100644 --- a/clearml_serving/statistics/Dockerfile +++ b/clearml_serving/statistics/Dockerfile @@ -4,13 +4,13 @@ FROM python:3.9-bullseye ENV LC_ALL=C.UTF-8 # install base package -RUN pip3 install clearml-serving +RUN pip3 install --no-cache-dir clearml-serving # get latest execution code from the git repository # RUN cd $HOME && git clone https://github.com/allegroai/clearml-serving.git COPY clearml_serving /root/clearml/clearml_serving -RUN pip3 install -r /root/clearml/clearml_serving/statistics/requirements.txt +RUN pip3 install --no-cache-dir -r /root/clearml/clearml_serving/statistics/requirements.txt # default serving port EXPOSE 9999 diff --git a/docker/docker-compose-triton-gpu.yml b/docker/docker-compose-triton-gpu.yml index 16d9908..74bc460 100644 --- a/docker/docker-compose-triton-gpu.yml +++ b/docker/docker-compose-triton-gpu.yml @@ -78,6 +78,9 @@ services: image: allegroai/clearml-serving-inference:latest container_name: clearml-serving-inference restart: unless-stopped + # optimize perforamnce + security_opt: + - seccomp:unconfined ports: - "8080:8080" environment: @@ -105,6 +108,9 @@ services: image: allegroai/clearml-serving-triton:latest container_name: clearml-serving-triton restart: unless-stopped + # optimize perforamnce + security_opt: + - seccomp:unconfined # ports: # - "8001:8001" environment: @@ -130,6 +136,9 @@ services: image: allegroai/clearml-serving-statistics:latest container_name: clearml-serving-statistics restart: unless-stopped + # optimize perforamnce + security_opt: + - seccomp:unconfined # ports: # - "9999:9999" environment: diff --git a/docker/docker-compose-triton.yml b/docker/docker-compose-triton.yml index eb4ea4a..fd97daf 100644 --- a/docker/docker-compose-triton.yml +++ b/docker/docker-compose-triton.yml @@ -78,6 +78,9 @@ services: image: allegroai/clearml-serving-inference:latest container_name: clearml-serving-inference restart: unless-stopped + # optimize perforamnce + security_opt: + - seccomp:unconfined ports: - "8080:8080" environment: @@ -105,6 +108,9 @@ services: image: allegroai/clearml-serving-triton:latest container_name: clearml-serving-triton restart: unless-stopped + # optimize perforamnce + security_opt: + - seccomp:unconfined # ports: # - "8001:8001" environment: @@ -125,6 +131,9 @@ services: image: allegroai/clearml-serving-statistics:latest container_name: clearml-serving-statistics restart: unless-stopped + # optimize perforamnce + security_opt: + - seccomp:unconfined # ports: # - "9999:9999" environment: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index fc42033..15a9ae9 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -78,6 +78,9 @@ services: image: allegroai/clearml-serving-inference:latest container_name: clearml-serving-inference restart: unless-stopped + # optimize perforamnce + security_opt: + - seccomp:unconfined ports: - "8080:8080" environment: @@ -104,6 +107,9 @@ services: image: allegroai/clearml-serving-statistics:latest container_name: clearml-serving-statistics restart: unless-stopped + # optimize perforamnce + security_opt: + - seccomp:unconfined # ports: # - "9999:9999" environment: diff --git a/examples/huggingface/docker-compose-override.yml b/examples/huggingface/docker-compose-override.yml new file mode 100644 index 0000000..69b1cd9 --- /dev/null +++ b/examples/huggingface/docker-compose-override.yml @@ -0,0 +1,3 @@ +services: + clearml-serving-triton: + image: allegroai/clearml-serving-triton:1.2.0-22.07 \ No newline at end of file