mirror of
				https://github.com/open-webui/open-webui
				synced 2025-06-26 18:26:48 +00:00 
			
		
		
		
	fixes and updates
This commit is contained in:
		
						commit
						9bcb37ea10
					
				
							
								
								
									
										36
									
								
								.github/workflows/docker-build.yaml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										36
									
								
								.github/workflows/docker-build.yaml
									
									
									
									
										vendored
									
									
								
							| @ -1,5 +1,4 @@ | ||||
| # | ||||
| name: Create and publish a Docker image | ||||
| name: Create and publish Docker images with specific build args | ||||
| 
 | ||||
| # Configures this workflow to run every time a change is pushed to the branch called `release`. | ||||
| on: | ||||
| @ -23,7 +22,7 @@ jobs: | ||||
|     permissions: | ||||
|       contents: read | ||||
|       packages: write | ||||
|       # | ||||
| 
 | ||||
|     steps: | ||||
|       - name: Checkout repository | ||||
|         uses: actions/checkout@v4 | ||||
| @ -41,8 +40,8 @@ jobs: | ||||
|           username: ${{ github.actor }} | ||||
|           password: ${{ secrets.GITHUB_TOKEN }} | ||||
| 
 | ||||
|       - name: Extract metadata for Docker images | ||||
|         id: meta | ||||
|       - name: Extract metadata for Docker images (default latest tag) | ||||
|         id: meta-latest | ||||
|         uses: docker/metadata-action@v5 | ||||
|         with: | ||||
|           images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} | ||||
| @ -53,14 +52,31 @@ jobs: | ||||
|             type=sha,prefix=git- | ||||
|             type=semver,pattern={{version}} | ||||
|             type=semver,pattern={{major}}.{{minor}} | ||||
|           flavor: | | ||||
|             latest=${{ github.ref == 'refs/heads/main' }} | ||||
|             latest=true | ||||
| 
 | ||||
|       - name: Build and push Docker image | ||||
|       - name: Build and push Docker image (latest) | ||||
|         uses: docker/build-push-action@v5 | ||||
|         with: | ||||
|           context: . | ||||
|           push: true | ||||
|           platforms: linux/amd64,linux/arm64 | ||||
|           tags: ${{ steps.meta.outputs.tags }} | ||||
|           labels: ${{ steps.meta.outputs.labels }} | ||||
|           tags: ${{ steps.meta-latest.outputs.tags }} | ||||
|           labels: ${{ steps.meta-latest.outputs.labels }} | ||||
| 
 | ||||
|       - name: Build and push Docker image with CUDA | ||||
|         uses: docker/build-push-action@v5 | ||||
|         with: | ||||
|           context: . | ||||
|           push: true | ||||
|           platforms: linux/amd64,linux/arm64 | ||||
|           tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:cuda | ||||
|           build-args: USE_CUDA=true | ||||
| 
 | ||||
|       - name: Build and push Docker image with Ollama | ||||
|         uses: docker/build-push-action@v5 | ||||
|         with: | ||||
|           context: . | ||||
|           push: true | ||||
|           platforms: linux/amd64,linux/arm64 | ||||
|           tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:with-ollama | ||||
|           build-args: USE_OLLAMA=true | ||||
|  | ||||
							
								
								
									
										129
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										129
									
								
								Dockerfile
									
									
									
									
									
								
							| @ -1,77 +1,110 @@ | ||||
| # syntax=docker/dockerfile:1 | ||||
| # Initialize device type args | ||||
| # use build args in the docker build commmand with --build-arg="BUILDARG=true" | ||||
| ARG USE_CUDA=false | ||||
| ARG USE_OLLAMA=false | ||||
| # Tested with cu117 for CUDA 11 and cu121 for CUDA 12 (default)  | ||||
| ARG USE_CUDA_VER=cu121 | ||||
| # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers | ||||
| # Leaderboard: https://huggingface.co/spaces/mteb/leaderboard  | ||||
| # for better performance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) | ||||
| # IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. | ||||
| ARG USE_EMBEDDING_MODEL=all-MiniLM-L6-v2 | ||||
| 
 | ||||
| FROM node:alpine as build | ||||
| ######## WebUI frontend ######## | ||||
| FROM node:21-alpine3.19 as build | ||||
| 
 | ||||
| WORKDIR /app | ||||
| 
 | ||||
| # wget embedding model weight from alpine (does not exist from slim-buster) | ||||
| RUN wget "https://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz" -O - | \ | ||||
|     tar -xzf - -C /app | ||||
| 
 | ||||
| COPY package.json package-lock.json ./ | ||||
| RUN npm ci | ||||
| 
 | ||||
| COPY . . | ||||
| RUN npm run build | ||||
| 
 | ||||
| 
 | ||||
| ######## WebUI backend ######## | ||||
| FROM python:3.11-slim-bookworm as base | ||||
| 
 | ||||
| ENV ENV=prod | ||||
| ENV PORT "" | ||||
| # Use args | ||||
| ARG USE_CUDA | ||||
| ARG USE_OLLAMA | ||||
| ARG USE_CUDA_VER | ||||
| ARG USE_EMBEDDING_MODEL | ||||
| 
 | ||||
| ENV OLLAMA_BASE_URL "/ollama" | ||||
| ## Basis ## | ||||
| ENV ENV=prod \ | ||||
|     PORT=8080 \ | ||||
|     # pass build args to the build | ||||
|     USE_OLLAMA_DOCKER=${USE_OLLAMA} \ | ||||
|     USE_CUDA_DOCKER=${USE_CUDA} \ | ||||
|     USE_CUDA_DOCKER_VER=${USE_CUDA_VER} \ | ||||
|     USE_EMBEDDING_MODEL_DOCKER=${USE_EMBEDDING_MODEL} | ||||
| 
 | ||||
| ENV OPENAI_API_BASE_URL "" | ||||
| ENV OPENAI_API_KEY "" | ||||
| ## Basis URL Config ## | ||||
| ENV OLLAMA_BASE_URL="/ollama" \ | ||||
|     OPENAI_API_BASE_URL="" | ||||
| 
 | ||||
| ENV WEBUI_SECRET_KEY "" | ||||
| ## API Key and Security Config ## | ||||
| ENV OPENAI_API_KEY="" \ | ||||
|     WEBUI_SECRET_KEY="" \ | ||||
|     SCARF_NO_ANALYTICS=true \ | ||||
|     DO_NOT_TRACK=true | ||||
| 
 | ||||
| ENV SCARF_NO_ANALYTICS true | ||||
| ENV DO_NOT_TRACK true | ||||
| #### Other models ######################################################### | ||||
| ## whisper TTS model settings ## | ||||
| ENV WHISPER_MODEL="base" \ | ||||
|     WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" | ||||
| 
 | ||||
| ######## Preloaded models ######## | ||||
| # whisper TTS Settings | ||||
| ENV WHISPER_MODEL="base" | ||||
| ENV WHISPER_MODEL_DIR="/app/backend/data/cache/whisper/models" | ||||
| 
 | ||||
| # RAG Embedding Model Settings | ||||
| # any sentence transformer model; models to use can be found at https://huggingface.co/models?library=sentence-transformers | ||||
| # Leaderboard: https://huggingface.co/spaces/mteb/leaderboard  | ||||
| # for better persormance and multilangauge support use "intfloat/multilingual-e5-large" (~2.5GB) or "intfloat/multilingual-e5-base" (~1.5GB) | ||||
| # IMPORTANT: If you change the default model (all-MiniLM-L6-v2) and vice versa, you aren't able to use RAG Chat with your previous documents loaded in the WebUI! You need to re-embed them. | ||||
| ENV RAG_EMBEDDING_MODEL="all-MiniLM-L6-v2" | ||||
| # device type for whisper tts and embbeding models - "cpu" (default), "cuda" (nvidia gpu and CUDA required) or "mps" (apple silicon) - choosing this right can lead to better performance | ||||
| ENV RAG_EMBEDDING_MODEL_DEVICE_TYPE="cpu" | ||||
| ENV RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" | ||||
| ENV SENTENCE_TRANSFORMERS_HOME $RAG_EMBEDDING_MODEL_DIR | ||||
| 
 | ||||
| ######## Preloaded models ######## | ||||
| ## RAG Embedding model settings ## | ||||
| ENV RAG_EMBEDDING_MODEL="$USE_EMBEDDING_MODEL_DOCKER" \ | ||||
|     RAG_EMBEDDING_MODEL_DIR="/app/backend/data/cache/embedding/models" \ | ||||
|     SENTENCE_TRANSFORMERS_HOME="/app/backend/data/cache/embedding/models" | ||||
| #### Other models ########################################################## | ||||
| 
 | ||||
| WORKDIR /app/backend | ||||
| 
 | ||||
| # install python dependencies | ||||
| COPY ./backend/requirements.txt ./requirements.txt | ||||
| 
 | ||||
| RUN apt-get update && apt-get install ffmpeg libsm6 libxext6  -y | ||||
| RUN if [ "$USE_CUDA" = "true" ]; then \ | ||||
|         # If you use CUDA the whisper and embedding modell will be downloaded on first use | ||||
|         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ | ||||
|         pip3 install -r requirements.txt --no-cache-dir && \ | ||||
|         python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])"; \ | ||||
|     else \ | ||||
|         pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ | ||||
|         pip3 install -r requirements.txt --no-cache-dir && \ | ||||
|         python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ | ||||
|         python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ | ||||
|     fi | ||||
| 
 | ||||
| RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir | ||||
| RUN pip3 install -r requirements.txt --no-cache-dir | ||||
| 
 | ||||
| # Install pandoc and netcat | ||||
| # RUN python -c "import pypandoc; pypandoc.download_pandoc()" | ||||
| RUN apt-get update \ | ||||
|     && apt-get install -y pandoc netcat-openbsd \ | ||||
|     && rm -rf /var/lib/apt/lists/* | ||||
| RUN if [ "$USE_OLLAMA" = "true" ]; then \ | ||||
|         apt-get update && \ | ||||
|         # Install pandoc and netcat | ||||
|         apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ | ||||
|         # for RAG OCR | ||||
|         apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ | ||||
|         # install helper tools | ||||
|         apt-get install -y --no-install-recommends curl && \ | ||||
|         # install ollama | ||||
|         curl -fsSL https://ollama.com/install.sh | sh && \ | ||||
|         # cleanup | ||||
|         rm -rf /var/lib/apt/lists/*; \ | ||||
|     else \ | ||||
|         apt-get update && \ | ||||
|         # Install pandoc and netcat | ||||
|         apt-get install -y --no-install-recommends pandoc netcat-openbsd && \ | ||||
|         # for RAG OCR | ||||
|         apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ | ||||
|         # cleanup | ||||
|         rm -rf /var/lib/apt/lists/*; \ | ||||
|     fi | ||||
| 
 | ||||
| 
 | ||||
| # preload embedding model | ||||
| RUN python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device=os.environ['RAG_EMBEDDING_MODEL_DEVICE_TYPE'])" | ||||
| # preload tts model | ||||
| RUN python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='auto', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" | ||||
| 
 | ||||
| # copy embedding weight from build | ||||
| RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | ||||
| COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx | ||||
| # RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 | ||||
| # COPY --from=build /app/onnx /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2/onnx | ||||
| 
 | ||||
| # copy built frontend files | ||||
| COPY --from=build /app/build /app/build | ||||
| @ -81,4 +114,6 @@ COPY --from=build /app/package.json /app/package.json | ||||
| # copy backend files | ||||
| COPY ./backend . | ||||
| 
 | ||||
| CMD [ "bash", "start.sh"] | ||||
| EXPOSE 8080 | ||||
| 
 | ||||
| CMD [ "bash", "start.sh"] | ||||
| @ -28,6 +28,7 @@ from config import ( | ||||
|     UPLOAD_DIR, | ||||
|     WHISPER_MODEL, | ||||
|     WHISPER_MODEL_DIR, | ||||
|     DEVICE_TYPE, | ||||
| ) | ||||
| 
 | ||||
| log = logging.getLogger(__name__) | ||||
| @ -42,6 +43,10 @@ app.add_middleware( | ||||
|     allow_headers=["*"], | ||||
| ) | ||||
| 
 | ||||
| # setting device type for whisper model | ||||
| whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu" | ||||
| log.info(f"whisper_device_type: {whisper_device_type}") | ||||
| 
 | ||||
| 
 | ||||
| @app.post("/transcribe") | ||||
| def transcribe( | ||||
| @ -66,7 +71,7 @@ def transcribe( | ||||
| 
 | ||||
|         model = WhisperModel( | ||||
|             WHISPER_MODEL, | ||||
|             device="auto", | ||||
|             device=whisper_device_type, | ||||
|             compute_type="int8", | ||||
|             download_root=WHISPER_MODEL_DIR, | ||||
|         ) | ||||
|  | ||||
| @ -59,7 +59,7 @@ from config import ( | ||||
|     UPLOAD_DIR, | ||||
|     DOCS_DIR, | ||||
|     RAG_EMBEDDING_MODEL, | ||||
|     RAG_EMBEDDING_MODEL_DEVICE_TYPE, | ||||
|     DEVICE_TYPE, | ||||
|     CHROMA_CLIENT, | ||||
|     CHUNK_SIZE, | ||||
|     CHUNK_OVERLAP, | ||||
| @ -71,15 +71,6 @@ from constants import ERROR_MESSAGES | ||||
| log = logging.getLogger(__name__) | ||||
| log.setLevel(SRC_LOG_LEVELS["RAG"]) | ||||
| 
 | ||||
| # | ||||
| # if RAG_EMBEDDING_MODEL: | ||||
| #    sentence_transformer_ef = SentenceTransformer( | ||||
| #        model_name_or_path=RAG_EMBEDDING_MODEL, | ||||
| #        cache_folder=RAG_EMBEDDING_MODEL_DIR, | ||||
| #        device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, | ||||
| #    ) | ||||
| 
 | ||||
| 
 | ||||
| app = FastAPI() | ||||
| 
 | ||||
| app.state.PDF_EXTRACT_IMAGES = False | ||||
| @ -92,7 +83,7 @@ app.state.TOP_K = 4 | ||||
| app.state.sentence_transformer_ef = ( | ||||
|     embedding_functions.SentenceTransformerEmbeddingFunction( | ||||
|         model_name=app.state.RAG_EMBEDDING_MODEL, | ||||
|         device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, | ||||
|         device=DEVICE_TYPE, | ||||
|     ) | ||||
| ) | ||||
| 
 | ||||
| @ -147,10 +138,9 @@ async def update_embedding_model( | ||||
|     app.state.sentence_transformer_ef = ( | ||||
|         embedding_functions.SentenceTransformerEmbeddingFunction( | ||||
|             model_name=app.state.RAG_EMBEDDING_MODEL, | ||||
|             device=RAG_EMBEDDING_MODEL_DEVICE_TYPE, | ||||
|             device=DEVICE_TYPE, | ||||
|         ) | ||||
|     ) | ||||
| 
 | ||||
|     return { | ||||
|         "status": True, | ||||
|         "embedding_model": app.state.RAG_EMBEDDING_MODEL, | ||||
|  | ||||
| @ -253,6 +253,8 @@ OLLAMA_API_BASE_URL = os.environ.get( | ||||
| 
 | ||||
| OLLAMA_BASE_URL = os.environ.get("OLLAMA_BASE_URL", "") | ||||
| K8S_FLAG = os.environ.get("K8S_FLAG", "") | ||||
| USE_OLLAMA_DOCKER = os.environ.get("USE_OLLAMA_DOCKER", "false") | ||||
| 
 | ||||
| 
 | ||||
| if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": | ||||
|     OLLAMA_BASE_URL = ( | ||||
| @ -263,7 +265,12 @@ if OLLAMA_BASE_URL == "" and OLLAMA_API_BASE_URL != "": | ||||
| 
 | ||||
| if ENV == "prod": | ||||
|     if OLLAMA_BASE_URL == "/ollama": | ||||
|         OLLAMA_BASE_URL = "http://host.docker.internal:11434" | ||||
|         if USE_OLLAMA_DOCKER.lower() == "true": | ||||
|             # if you use all-in-one docker container (Open WebUI + Ollama)  | ||||
|             # with the docker build arg USE_OLLAMA=true (--build-arg="USE_OLLAMA=true") this only works with http://localhost:11434 | ||||
|             OLLAMA_BASE_URL = "http://localhost:11434" | ||||
|         else:     | ||||
|             OLLAMA_BASE_URL = "http://host.docker.internal:11434" | ||||
| 
 | ||||
|     elif K8S_FLAG: | ||||
|         OLLAMA_BASE_URL = "http://ollama-service.open-webui.svc.cluster.local:11434" | ||||
| @ -384,10 +391,16 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "": | ||||
| CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" | ||||
| # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2) | ||||
| RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2") | ||||
| log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"), | ||||
| # device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance | ||||
| RAG_EMBEDDING_MODEL_DEVICE_TYPE = os.environ.get( | ||||
|     "RAG_EMBEDDING_MODEL_DEVICE_TYPE", "cpu" | ||||
| ) | ||||
| USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false") | ||||
| 
 | ||||
| if USE_CUDA.lower() == "true": | ||||
|     DEVICE_TYPE = "cuda" | ||||
| else: | ||||
|     DEVICE_TYPE = "cpu" | ||||
| 
 | ||||
| 
 | ||||
| CHROMA_CLIENT = chromadb.PersistentClient( | ||||
|     path=CHROMA_DATA_PATH, | ||||
|     settings=Settings(allow_reset=True, anonymized_telemetry=False), | ||||
|  | ||||
| @ -7,16 +7,26 @@ KEY_FILE=.webui_secret_key | ||||
| 
 | ||||
| PORT="${PORT:-8080}" | ||||
| if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then | ||||
|   echo No WEBUI_SECRET_KEY provided | ||||
|   echo "No WEBUI_SECRET_KEY provided" | ||||
| 
 | ||||
|   if ! [ -e "$KEY_FILE" ]; then | ||||
|     echo Generating WEBUI_SECRET_KEY | ||||
|     echo "Generating WEBUI_SECRET_KEY" | ||||
|     # Generate a random value to use as a WEBUI_SECRET_KEY in case the user didn't provide one. | ||||
|     echo $(head -c 12 /dev/random | base64) > $KEY_FILE | ||||
|     echo $(head -c 12 /dev/random | base64) > "$KEY_FILE" | ||||
|   fi | ||||
| 
 | ||||
|   echo Loading WEBUI_SECRET_KEY from $KEY_FILE | ||||
|   WEBUI_SECRET_KEY=`cat $KEY_FILE` | ||||
|   echo "Loading WEBUI_SECRET_KEY from $KEY_FILE" | ||||
|   WEBUI_SECRET_KEY=$(cat "$KEY_FILE") | ||||
| fi | ||||
| 
 | ||||
| WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' | ||||
| if [ "$USE_OLLAMA_DOCKER" = "true" ]; then | ||||
|     echo "USE_OLLAMA is set to true, starting ollama serve." | ||||
|     ollama serve & | ||||
| fi | ||||
| 
 | ||||
| if [ "$USE_CUDA_DOCKER" = "true" ]; then | ||||
|   echo "CUDA is enabled, appending LD_LIBRARY_PATH to include torch/cudnn & cublas libraries." | ||||
|   export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cublas/lib" | ||||
| fi | ||||
| 
 | ||||
| WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" exec uvicorn main:app --host 0.0.0.0 --port "$PORT" --forwarded-allow-ips '*' | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user