From b817b4b6ee52be17d3e2a141a64893276acf19a7 Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Mon, 11 Nov 2024 23:18:24 +0530 Subject: [PATCH] refactor: gpu support and docker setup improvements - Add gpu status refresh with useEffect - Update docker-compose.yml configuration - Modify gpu setup scripts - Improve gpu support checks --- .../settings/servers/gpu-support.tsx | 15 +++++-- .../templates/blender/docker-compose.yml | 1 - packages/server/src/utils/gpu-setup.ts | 42 ++++++++++++++----- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx index e89a9b66..b398fe74 100644 --- a/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx +++ b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx @@ -11,7 +11,7 @@ import { import { api } from "@/utils/api"; import { TRPCClientError } from "@trpc/client"; import { CheckCircle2, Cpu, Loader2, RefreshCw, XCircle } from "lucide-react"; -import { useState } from "react"; +import { useEffect, useState } from "react"; import { toast } from "sonner"; interface GPUSupportProps { @@ -54,9 +54,18 @@ export function GPUSupport({ serverId }: GPUSupportProps) { const handleRefresh = async () => { setIsRefreshing(true); - await refetch(); - setIsRefreshing(false); + try { + await utils.settings.checkGPUStatus.invalidate({ serverId }); + await refetch(); + } catch (error) { + toast.error("Failed to refresh GPU status"); + } finally { + setIsRefreshing(false); + } }; + useEffect(() => { + handleRefresh(); + }, []); const handleEnableGPU = async () => { if (serverId === undefined) { diff --git a/apps/dokploy/templates/blender/docker-compose.yml b/apps/dokploy/templates/blender/docker-compose.yml index da769c6b..893f3dee 100644 --- a/apps/dokploy/templates/blender/docker-compose.yml +++ b/apps/dokploy/templates/blender/docker-compose.yml @@ -3,7 +3,6 @@ version: "3.8" services: blender: image: lscr.io/linuxserver/blender:latest - container_name: blender runtime: nvidia deploy: resources: diff --git a/packages/server/src/utils/gpu-setup.ts b/packages/server/src/utils/gpu-setup.ts index bb366762..12d46dc1 100644 --- a/packages/server/src/utils/gpu-setup.ts +++ b/packages/server/src/utils/gpu-setup.ts @@ -87,20 +87,38 @@ const checkRuntime = async (serverId?: string) => { let runtimeConfigured = false; try { - const runtimeCommand = 'docker info --format "{{json .Runtimes}}"'; - const { stdout: runtimeInfo } = serverId - ? await execAsyncRemote(serverId, runtimeCommand) - : await execAsync(runtimeCommand); + // First check: Is nvidia-container-runtime installed? + const checkBinaryCommand = "command -v nvidia-container-runtime"; + try { + const { stdout } = serverId + ? await execAsyncRemote(serverId, checkBinaryCommand) + : await execAsync(checkBinaryCommand); + runtimeInstalled = !!stdout.trim(); + } catch (error) { + console.debug("Runtime binary check:", error); + } - const runtimes = JSON.parse(runtimeInfo); - runtimeInstalled = "nvidia" in runtimes; + // Second check: Is it configured in Docker? + try { + const runtimeCommand = 'docker info --format "{{json .Runtimes}}"'; + const { stdout: runtimeInfo } = serverId + ? await execAsyncRemote(serverId, runtimeCommand) + : await execAsync(runtimeCommand); - const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"'; - const { stdout: defaultRuntime } = serverId - ? await execAsyncRemote(serverId, defaultCommand) - : await execAsync(defaultCommand); + const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"'; + const { stdout: defaultRuntime } = serverId + ? await execAsyncRemote(serverId, defaultCommand) + : await execAsync(defaultCommand); - runtimeConfigured = defaultRuntime.trim() === "nvidia"; + const runtimes = JSON.parse(runtimeInfo); + const hasNvidiaRuntime = "nvidia" in runtimes; + const isDefaultRuntime = defaultRuntime.trim() === "nvidia"; + + // Only set runtimeConfigured if both conditions are met + runtimeConfigured = hasNvidiaRuntime && isDefaultRuntime; + } catch (error) { + console.debug("Runtime configuration check:", error); + } } catch (error) { console.debug("Runtime check:", error); } @@ -279,6 +297,7 @@ const setupRemoteServer = async (serverId: string, daemonConfig: any) => { "sudo -n true", `echo '${JSON.stringify(daemonConfig, null, 2)}' | sudo tee /etc/docker/daemon.json`, "sudo mkdir -p /etc/nvidia-container-runtime", + 'sudo sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml', 'echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" | sudo tee -a /etc/nvidia-container-runtime/config.toml', "sudo systemctl daemon-reload", "sudo systemctl restart docker", @@ -295,6 +314,7 @@ const setupLocalServer = async (daemonConfig: any) => { `pkexec sh -c ' cp ${configFile} /etc/docker/daemon.json && mkdir -p /etc/nvidia-container-runtime && + sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml && echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" >> /etc/nvidia-container-runtime/config.toml && systemctl daemon-reload && systemctl restart docker