From cd91d8a987518ac1a050f24489ee9fc7c4e3c9a7 Mon Sep 17 00:00:00 2001 From: Jun Siang Cheah Date: Wed, 10 Apr 2024 13:39:46 +0100 Subject: [PATCH] run apt install first for better potential layer caching --- Dockerfile | 64 +++++++++++++++++++++++++++--------------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/Dockerfile b/Dockerfile index 6b44afd3f..6cf8ded28 100644 --- a/Dockerfile +++ b/Dockerfile @@ -67,43 +67,43 @@ ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \ #### Other models ########################################################## WORKDIR /app/backend + +RUN if [ "$USE_OLLAMA" = "true" ]; then \ + apt-get update && \ + # Install pandoc and netcat + apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ + # for RAG OCR + apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ + # install helper tools + apt-get install -y --no-install-recommends curl && \ + # install ollama + curl -fsSL https://ollama.com/install.sh | sh && \ + # cleanup + rm -rf /var/lib/apt/lists/*; \ + else \ + apt-get update && \ + # Install pandoc and netcat + apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ + # for RAG OCR + apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ + # cleanup + rm -rf /var/lib/apt/lists/*; \ + fi + # install python dependencies COPY ./backend/requirements.txt ./requirements.txt RUN if [ "$USE_CUDA" = "true" ]; then \ - # If you use CUDA the whisper and embedding modell will be downloaded on first use - pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ - pip3 install -r requirements.txt --no-cache-dir && \ - python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ - python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ + # If you use CUDA the whisper and embedding model will be downloaded on first use + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ + pip3 install -r requirements.txt --no-cache-dir && \ + python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ + python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ else \ - pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ - pip3 install -r requirements.txt --no-cache-dir && \ - python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ - python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ - fi - - -RUN if [ "$USE_OLLAMA" = "true" ]; then \ - apt-get update && \ - # Install pandoc and netcat - apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ - # for RAG OCR - apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ - # install helper tools - apt-get install -y --no-install-recommends curl && \ - # install ollama - curl -fsSL https://ollama.com/install.sh | sh && \ - # cleanup - rm -rf /var/lib/apt/lists/*; \ - else \ - apt-get update && \ - # Install pandoc and netcat - apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ - # for RAG OCR - apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ - # cleanup - rm -rf /var/lib/apt/lists/*; \ + pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ + pip3 install -r requirements.txt --no-cache-dir && \ + python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ + python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ fi