From f4bf7773a6a766a9c6d80b3c28cef6758dcc8f25 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Tue, 26 Dec 2023 02:11:22 +0100 Subject: [PATCH 1/4] Update run-compose.sh --- run-compose.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/run-compose.sh b/run-compose.sh index 7c7ceb714..0557bce95 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -80,12 +80,12 @@ usage() { echo " -h, --help Show this help message." echo "" echo "Examples:" - echo " ./$0 --drop" - echo " ./$0 --enable-gpu[count=1]" - echo " ./$0 --enable-api[port=11435]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" + echo " $0 --drop" + echo " $0 --enable-gpu[count=1]" + echo " $0 --enable-api[port=11435]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" echo "" echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration." echo "About the gpu to use, the script automatically detects it using the "lspci" command." @@ -234,4 +234,4 @@ else echo "Aborted." fi -echo \ No newline at end of file +echo From 54e89a451649bcbb41942c8ad9f00290ff3e7f85 Mon Sep 17 00:00:00 2001 From: Daniele Viti Date: Tue, 26 Dec 2023 02:28:45 +0100 Subject: [PATCH 2/4] Restored docker compose configuration Also added the override for enabling GPU and better explained SO and hardware limitations --- README.md | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index aea5c8a51..be6b508c5 100644 --- a/README.md +++ b/README.md @@ -71,23 +71,40 @@ Don't forget to explore our sibling project, [OllamaHub](https://ollamahub.com/) ## How to Install 🚀 -### Installing Both Ollama and Ollama Web UI Using Docker Compose +### Installing Both Ollama and Ollama Web UI Using Provided run-compose.sh bash script +Also available on Windows under any docker-enabled WSL2 linux distro (you have to enable it from Docker Desktop) -If you don't have Ollama installed yet, you can use the provided bash script for a hassle-free installation. Simply run the following command: - -For cpu-only container +Simply run the following command: +Grant execute permission to script ```bash -chmod +x run-compose.sh && ./run-compose.sh +chmod +x run-compose.sh ``` -For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) +For CPU only container ```bash -chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1] +./run-compose.sh +``` + +For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) +Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU +```bash +./run-compose.sh --enable-gpu ``` Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example: ```bash -./run-compose.sh --build --enable-gpu[count=1] +./run-compose.sh --enable-gpu --build +``` + +### Installing Both Ollama and Ollama Web UI Using Docker Compose +To install using docker compose script as CPU-only installation simply run this command +```bash +docker compose up -d +``` + +for a GPU-enabled installation (provided you installed the necessary gpu drivers and you are using nvidia) +```bash +docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d ``` ### Installing Both Ollama and Ollama Web UI Using Kustomize From 02ce0c47f48fa9cd86dbc8266457f75e982ba147 Mon Sep 17 00:00:00 2001 From: Kenneth Bingham Date: Wed, 27 Dec 2023 19:49:51 -0500 Subject: [PATCH 3/4] let ingress be enabled by default --- kubernetes/helm/templates/webui-ingress.yaml | 9 ++++++--- kubernetes/helm/values.yaml | 4 ++++ 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/kubernetes/helm/templates/webui-ingress.yaml b/kubernetes/helm/templates/webui-ingress.yaml index 3970825d1..84f819f37 100644 --- a/kubernetes/helm/templates/webui-ingress.yaml +++ b/kubernetes/helm/templates/webui-ingress.yaml @@ -1,11 +1,13 @@ +{{- if .Values.webui.ingress.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: ollama-webui-ingress namespace: {{ .Values.namespace }} - #annotations: - # Use appropriate annotations for your Ingress controller, e.g., for NGINX: - # nginx.ingress.kubernetes.io/rewrite-target: / +{{- if .Values.webui.ingress.annotations }} + annotations: +{{ toYaml .Values.webui.ingress.annotations | trimSuffix "\n" | indent 4 }} +{{- end }} spec: rules: - host: {{ .Values.webui.ingress.host }} @@ -18,3 +20,4 @@ spec: name: ollama-webui-service port: number: {{ .Values.webui.servicePort }} +{{- end }} diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index bfdf15257..f115f82fe 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -20,4 +20,8 @@ webui: cpu: "500m" memory: "500Mi" ingress: + enabled: true + annotations: + # Use appropriate annotations for your Ingress controller, e.g., for NGINX: + # nginx.ingress.kubernetes.io/rewrite-target: / host: ollama.minikube.local From b42b2e7890b403d798628dbe3089c2887d3c0fa9 Mon Sep 17 00:00:00 2001 From: braveokafor Date: Thu, 28 Dec 2023 16:28:09 +0100 Subject: [PATCH 4/4] Added nodeSelectors for allocating GPU nodePools in the cloud and configured volumes for WebUI --- kubernetes/helm/templates/ollama-service.yaml | 1 + .../helm/templates/ollama-statefulset.yaml | 29 +++++++++++++++---- .../helm/templates/webui-deployment.yaml | 22 ++++++++++---- kubernetes/helm/templates/webui-pvc.yaml | 12 ++++++++ kubernetes/helm/templates/webui-service.yaml | 2 +- kubernetes/helm/values.yaml | 11 +++++++ 6 files changed, 64 insertions(+), 13 deletions(-) create mode 100644 kubernetes/helm/templates/webui-pvc.yaml diff --git a/kubernetes/helm/templates/ollama-service.yaml b/kubernetes/helm/templates/ollama-service.yaml index afa25e38b..54558473a 100644 --- a/kubernetes/helm/templates/ollama-service.yaml +++ b/kubernetes/helm/templates/ollama-service.yaml @@ -4,6 +4,7 @@ metadata: name: ollama-service namespace: {{ .Values.namespace }} spec: + type: {{ .Values.ollama.service.type }} selector: app: ollama ports: diff --git a/kubernetes/helm/templates/ollama-statefulset.yaml b/kubernetes/helm/templates/ollama-statefulset.yaml index 755ed008a..83cb6883f 100644 --- a/kubernetes/helm/templates/ollama-statefulset.yaml +++ b/kubernetes/helm/templates/ollama-statefulset.yaml @@ -19,15 +19,32 @@ spec: image: {{ .Values.ollama.image }} ports: - containerPort: {{ .Values.ollama.servicePort }} - resources: - limits: - cpu: {{ .Values.ollama.resources.limits.cpu }} - memory: {{ .Values.ollama.resources.limits.memory }} - nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }} + env: + {{- if .Values.ollama.gpu.enabled }} + - name: PATH + value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 + - name: NVIDIA_DRIVER_CAPABILITIES + value: compute,utility + {{- end}} + {{- if .Values.ollama.resources }} + resources: {{- toYaml .Values.ollama.resources | nindent 10 }} + {{- end }} volumeMounts: - name: ollama-volume mountPath: /root/.ollama tty: true + {{- with .Values.ollama.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + tolerations: + {{- if .Values.ollama.gpu.enabled }} + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + {{- end }} volumeClaimTemplates: - metadata: name: ollama-volume @@ -35,4 +52,4 @@ spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 1Gi \ No newline at end of file + storage: {{ .Values.ollama.volumeSize }} \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-deployment.yaml b/kubernetes/helm/templates/webui-deployment.yaml index ec4fc79f4..d9721ee05 100644 --- a/kubernetes/helm/templates/webui-deployment.yaml +++ b/kubernetes/helm/templates/webui-deployment.yaml @@ -15,14 +15,24 @@ spec: spec: containers: - name: ollama-webui - image: ghcr.io/ollama-webui/ollama-webui:main + image: {{ .Values.webui.image }} ports: - containerPort: 8080 - resources: - limits: - cpu: "500m" - memory: "500Mi" + {{- if .Values.webui.resources }} + resources: {{- toYaml .Values.webui.resources | nindent 10 }} + {{- end }} + volumeMounts: + - name: webui-volume + mountPath: /app/backend/data env: - name: OLLAMA_API_BASE_URL value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api" - tty: true \ No newline at end of file + tty: true + {{- with .Values.webui.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: webui-volume + persistentVolumeClaim: + claimName: ollama-webui-pvc \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-pvc.yaml b/kubernetes/helm/templates/webui-pvc.yaml new file mode 100644 index 000000000..e9961aa8d --- /dev/null +++ b/kubernetes/helm/templates/webui-pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app: ollama-webui + name: ollama-webui-pvc + namespace: {{ .Values.namespace }} +spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: {{ .Values.webui.volumeSize }} \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-service.yaml b/kubernetes/helm/templates/webui-service.yaml index dd6058558..7fefa4fd4 100644 --- a/kubernetes/helm/templates/webui-service.yaml +++ b/kubernetes/helm/templates/webui-service.yaml @@ -4,7 +4,7 @@ metadata: name: ollama-webui-service namespace: {{ .Values.namespace }} spec: - type: NodePort # Use LoadBalancer if you're on a cloud that supports it + type: {{ .Values.webui.service.type }} # Default: NodePort # Use LoadBalancer if you're on a cloud that supports it selector: app: ollama-webui ports: diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index f115f82fe..648b40509 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -10,6 +10,12 @@ ollama: memory: "2Gi" nvidia.com/gpu: "0" volumeSize: 1Gi + nodeSelector: {} + tolerations: [] + service: + type: ClusterIP + gpu: + enabled: false webui: replicaCount: 1 @@ -25,3 +31,8 @@ webui: # Use appropriate annotations for your Ingress controller, e.g., for NGINX: # nginx.ingress.kubernetes.io/rewrite-target: / host: ollama.minikube.local + volumeSize: 1Gi + nodeSelector: {} + tolerations: [] + service: + type: NodePort \ No newline at end of file