diff --git a/README.md b/README.md index ad4b2bbb5..6c7d7e60c 100644 --- a/README.md +++ b/README.md @@ -85,17 +85,29 @@ If you don't have Ollama installed yet, you can use the provided bash script for For cpu-only container ```bash -chmod +x run-compose.sh && ./run-compose.sh +./run-compose.sh ``` -For gpu-enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) +For GPU enabled container (to enable this you must have your gpu driver for docker, it mostly works with nvidia so this is the official install guide: [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)) +Warning! A GPU-enabled installation has only been tested using linux and nvidia GPU, full functionalities are not guaranteed under Windows or Macos or using a different GPU ```bash -chmod +x run-compose.sh && ./run-compose.sh --enable-gpu[count=1] +./run-compose.sh --enable-gpu ``` Note that both the above commands will use the latest production docker image in repository, to be able to build the latest local version you'll need to append the `--build` parameter, for example: ```bash -./run-compose.sh --build --enable-gpu[count=1] +./run-compose.sh --enable-gpu --build +``` + +### Installing Both Ollama and Ollama Web UI Using Docker Compose +To install using docker compose script as CPU-only installation simply run this command +```bash +docker compose up -d +``` + +for a GPU-enabled installation (provided you installed the necessary gpu drivers and you are using nvidia) +```bash +docker compose -f docker-compose.yaml -f docker-compose.gpu.yaml up -d ``` ### Installing Both Ollama and Ollama Web UI Using Kustomize diff --git a/kubernetes/helm/templates/ollama-service.yaml b/kubernetes/helm/templates/ollama-service.yaml index afa25e38b..54558473a 100644 --- a/kubernetes/helm/templates/ollama-service.yaml +++ b/kubernetes/helm/templates/ollama-service.yaml @@ -4,6 +4,7 @@ metadata: name: ollama-service namespace: {{ .Values.namespace }} spec: + type: {{ .Values.ollama.service.type }} selector: app: ollama ports: diff --git a/kubernetes/helm/templates/ollama-statefulset.yaml b/kubernetes/helm/templates/ollama-statefulset.yaml index 755ed008a..83cb6883f 100644 --- a/kubernetes/helm/templates/ollama-statefulset.yaml +++ b/kubernetes/helm/templates/ollama-statefulset.yaml @@ -19,15 +19,32 @@ spec: image: {{ .Values.ollama.image }} ports: - containerPort: {{ .Values.ollama.servicePort }} - resources: - limits: - cpu: {{ .Values.ollama.resources.limits.cpu }} - memory: {{ .Values.ollama.resources.limits.memory }} - nvidia.com/gpu: {{ .Values.ollama.resources.limits.gpu }} + env: + {{- if .Values.ollama.gpu.enabled }} + - name: PATH + value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + - name: LD_LIBRARY_PATH + value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64 + - name: NVIDIA_DRIVER_CAPABILITIES + value: compute,utility + {{- end}} + {{- if .Values.ollama.resources }} + resources: {{- toYaml .Values.ollama.resources | nindent 10 }} + {{- end }} volumeMounts: - name: ollama-volume mountPath: /root/.ollama tty: true + {{- with .Values.ollama.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + tolerations: + {{- if .Values.ollama.gpu.enabled }} + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + {{- end }} volumeClaimTemplates: - metadata: name: ollama-volume @@ -35,4 +52,4 @@ spec: accessModes: [ "ReadWriteOnce" ] resources: requests: - storage: 1Gi \ No newline at end of file + storage: {{ .Values.ollama.volumeSize }} \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-deployment.yaml b/kubernetes/helm/templates/webui-deployment.yaml index ec4fc79f4..d9721ee05 100644 --- a/kubernetes/helm/templates/webui-deployment.yaml +++ b/kubernetes/helm/templates/webui-deployment.yaml @@ -15,14 +15,24 @@ spec: spec: containers: - name: ollama-webui - image: ghcr.io/ollama-webui/ollama-webui:main + image: {{ .Values.webui.image }} ports: - containerPort: 8080 - resources: - limits: - cpu: "500m" - memory: "500Mi" + {{- if .Values.webui.resources }} + resources: {{- toYaml .Values.webui.resources | nindent 10 }} + {{- end }} + volumeMounts: + - name: webui-volume + mountPath: /app/backend/data env: - name: OLLAMA_API_BASE_URL value: "http://ollama-service.{{ .Values.namespace }}.svc.cluster.local:{{ .Values.ollama.servicePort }}/api" - tty: true \ No newline at end of file + tty: true + {{- with .Values.webui.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: webui-volume + persistentVolumeClaim: + claimName: ollama-webui-pvc \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-ingress.yaml b/kubernetes/helm/templates/webui-ingress.yaml index 3970825d1..84f819f37 100644 --- a/kubernetes/helm/templates/webui-ingress.yaml +++ b/kubernetes/helm/templates/webui-ingress.yaml @@ -1,11 +1,13 @@ +{{- if .Values.webui.ingress.enabled }} apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: ollama-webui-ingress namespace: {{ .Values.namespace }} - #annotations: - # Use appropriate annotations for your Ingress controller, e.g., for NGINX: - # nginx.ingress.kubernetes.io/rewrite-target: / +{{- if .Values.webui.ingress.annotations }} + annotations: +{{ toYaml .Values.webui.ingress.annotations | trimSuffix "\n" | indent 4 }} +{{- end }} spec: rules: - host: {{ .Values.webui.ingress.host }} @@ -18,3 +20,4 @@ spec: name: ollama-webui-service port: number: {{ .Values.webui.servicePort }} +{{- end }} diff --git a/kubernetes/helm/templates/webui-pvc.yaml b/kubernetes/helm/templates/webui-pvc.yaml new file mode 100644 index 000000000..e9961aa8d --- /dev/null +++ b/kubernetes/helm/templates/webui-pvc.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + app: ollama-webui + name: ollama-webui-pvc + namespace: {{ .Values.namespace }} +spec: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: {{ .Values.webui.volumeSize }} \ No newline at end of file diff --git a/kubernetes/helm/templates/webui-service.yaml b/kubernetes/helm/templates/webui-service.yaml index dd6058558..7fefa4fd4 100644 --- a/kubernetes/helm/templates/webui-service.yaml +++ b/kubernetes/helm/templates/webui-service.yaml @@ -4,7 +4,7 @@ metadata: name: ollama-webui-service namespace: {{ .Values.namespace }} spec: - type: NodePort # Use LoadBalancer if you're on a cloud that supports it + type: {{ .Values.webui.service.type }} # Default: NodePort # Use LoadBalancer if you're on a cloud that supports it selector: app: ollama-webui ports: diff --git a/kubernetes/helm/values.yaml b/kubernetes/helm/values.yaml index bfdf15257..648b40509 100644 --- a/kubernetes/helm/values.yaml +++ b/kubernetes/helm/values.yaml @@ -10,6 +10,12 @@ ollama: memory: "2Gi" nvidia.com/gpu: "0" volumeSize: 1Gi + nodeSelector: {} + tolerations: [] + service: + type: ClusterIP + gpu: + enabled: false webui: replicaCount: 1 @@ -20,4 +26,13 @@ webui: cpu: "500m" memory: "500Mi" ingress: + enabled: true + annotations: + # Use appropriate annotations for your Ingress controller, e.g., for NGINX: + # nginx.ingress.kubernetes.io/rewrite-target: / host: ollama.minikube.local + volumeSize: 1Gi + nodeSelector: {} + tolerations: [] + service: + type: NodePort \ No newline at end of file diff --git a/run-compose.sh b/run-compose.sh index 7c7ceb714..0557bce95 100755 --- a/run-compose.sh +++ b/run-compose.sh @@ -80,12 +80,12 @@ usage() { echo " -h, --help Show this help message." echo "" echo "Examples:" - echo " ./$0 --drop" - echo " ./$0 --enable-gpu[count=1]" - echo " ./$0 --enable-api[port=11435]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" - echo " ./$0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" + echo " $0 --drop" + echo " $0 --enable-gpu[count=1]" + echo " $0 --enable-api[port=11435]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data]" + echo " $0 --enable-gpu[count=1] --enable-api[port=12345] --webui[port=3000] --data[folder=./ollama-data] --build" echo "" echo "This script configures and runs a docker-compose setup with optional GPU support, API exposure, and web UI configuration." echo "About the gpu to use, the script automatically detects it using the "lspci" command." @@ -234,4 +234,4 @@ else echo "Aborted." fi -echo \ No newline at end of file +echo