diff --git a/apps/dokploy/components/dashboard/settings/servers/actions/show-dokploy-actions.tsx b/apps/dokploy/components/dashboard/settings/servers/actions/show-dokploy-actions.tsx index 49f6772b..9b12af84 100644 --- a/apps/dokploy/components/dashboard/settings/servers/actions/show-dokploy-actions.tsx +++ b/apps/dokploy/components/dashboard/settings/servers/actions/show-dokploy-actions.tsx @@ -13,6 +13,7 @@ import { import { api } from "@/utils/api"; import { toast } from "sonner"; import { ShowModalLogs } from "../../web-server/show-modal-logs"; +import { GPUSupportModal } from "../gpu-support-modal"; export const ShowDokployActions = () => { const { mutateAsync: reloadServer, isLoading } = @@ -45,6 +46,7 @@ export const ShowDokployActions = () => { Watch logs + diff --git a/apps/dokploy/components/dashboard/settings/servers/gpu-support-modal.tsx b/apps/dokploy/components/dashboard/settings/servers/gpu-support-modal.tsx new file mode 100644 index 00000000..9cf858cd --- /dev/null +++ b/apps/dokploy/components/dashboard/settings/servers/gpu-support-modal.tsx @@ -0,0 +1,36 @@ +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { DropdownMenuItem } from "@/components/ui/dropdown-menu"; +import { useState } from "react"; +import { GPUSupport } from "./gpu-support"; + +export const GPUSupportModal = () => { + const [isOpen, setIsOpen] = useState(false); + + return ( + + + e.preventDefault()} + > + GPU Setup + + + + + + Dokploy Server GPU Setup + + + + + + + ); +}; diff --git a/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx new file mode 100644 index 00000000..b398fe74 --- /dev/null +++ b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx @@ -0,0 +1,282 @@ +import { AlertBlock } from "@/components/shared/alert-block"; +import { DialogAction } from "@/components/shared/dialog-action"; +import { Button } from "@/components/ui/button"; +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@/components/ui/card"; +import { api } from "@/utils/api"; +import { TRPCClientError } from "@trpc/client"; +import { CheckCircle2, Cpu, Loader2, RefreshCw, XCircle } from "lucide-react"; +import { useEffect, useState } from "react"; +import { toast } from "sonner"; + +interface GPUSupportProps { + serverId?: string; +} + +export function GPUSupport({ serverId }: GPUSupportProps) { + const [isLoading, setIsLoading] = useState(false); + const [isRefreshing, setIsRefreshing] = useState(false); + const utils = api.useContext(); + + const { + data: gpuStatus, + isLoading: isChecking, + refetch, + } = api.settings.checkGPUStatus.useQuery( + { serverId }, + { + enabled: serverId !== undefined, + }, + ); + + const setupGPU = api.settings.setupGPU.useMutation({ + onMutate: () => { + setIsLoading(true); + }, + onSuccess: async () => { + toast.success("GPU support enabled successfully"); + setIsLoading(false); + await utils.settings.checkGPUStatus.invalidate({ serverId }); + }, + onError: (error) => { + toast.error( + error.message || + "Failed to enable GPU support. Please check server logs.", + ); + setIsLoading(false); + }, + }); + + const handleRefresh = async () => { + setIsRefreshing(true); + try { + await utils.settings.checkGPUStatus.invalidate({ serverId }); + await refetch(); + } catch (error) { + toast.error("Failed to refresh GPU status"); + } finally { + setIsRefreshing(false); + } + }; + useEffect(() => { + handleRefresh(); + }, []); + + const handleEnableGPU = async () => { + if (serverId === undefined) { + toast.error("No server selected"); + return; + } + + try { + await setupGPU.mutateAsync({ serverId }); + } catch (error) { + // Error handling is done in mutation's onError + } + }; + + return ( + +
+ + +
+
+
+ + GPU Configuration +
+ + Configure and monitor GPU support + +
+
+ + + + +
+
+
+ + + +
System Requirements:
+
    +
  • NVIDIA GPU hardware must be physically installed
  • +
  • + NVIDIA drivers must be installed and running (check with + nvidia-smi) +
  • +
  • + NVIDIA Container Runtime must be installed + (nvidia-container-runtime) +
  • +
  • User must have sudo/administrative privileges
  • +
  • System must support CUDA for GPU acceleration
  • +
+
+ + {isChecking ? ( +
+ + Checking GPU status... +
+ ) : ( +
+ {/* Prerequisites Section */} +
+

Prerequisites

+

+ Shows all software checks and available hardware +

+
+ + + + + + +
+
+ + {/* Configuration Status */} +
+

+ Docker Swarm GPU Status +

+

+ Shows the configuration state that changes with the Enable + GPU +

+
+ + +
+
+
+ )} +
+
+
+
+ ); +} + +interface StatusRowProps { + label: string; + isEnabled?: boolean; + description?: string; + value?: string | number; + showIcon?: boolean; +} + +export function StatusRow({ + label, + isEnabled, + description, + value, + showIcon = true, +}: StatusRowProps) { + return ( +
+ {label} +
+ {showIcon ? ( + <> + {isEnabled ? ( + + ) : ( + + )} + + {description || (isEnabled ? "Installed" : "Not Installed")} + + + ) : ( + {value} + )} +
+
+ ); +} diff --git a/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx b/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx index 8bfcf4da..119d4d29 100644 --- a/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx +++ b/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx @@ -32,6 +32,7 @@ import Link from "next/link"; import { useState } from "react"; import { toast } from "sonner"; import { ShowDeployment } from "../../application/deployments/show-deployment"; +import { GPUSupport } from "./gpu-support"; interface Props { serverId: string; @@ -89,9 +90,10 @@ export const SetupServer = ({ serverId }: Props) => { ) : (
- + SSH Keys Deployments + GPU Setup {
+ +
+ +
+
)} diff --git a/apps/dokploy/public/templates/blender.svg b/apps/dokploy/public/templates/blender.svg new file mode 100644 index 00000000..e59079f5 --- /dev/null +++ b/apps/dokploy/public/templates/blender.svg @@ -0,0 +1,153 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/apps/dokploy/server/api/routers/settings.ts b/apps/dokploy/server/api/routers/settings.ts index e5c34b11..7c777b17 100644 --- a/apps/dokploy/server/api/routers/settings.ts +++ b/apps/dokploy/server/api/routers/settings.ts @@ -52,6 +52,7 @@ import { writeMainConfig, writeTraefikConfigInPath, } from "@dokploy/server"; +import { checkGPUStatus, setupGPUSupport } from "@dokploy/server"; import { generateOpenApiDocument } from "@dokploy/trpc-openapi"; import { TRPCError } from "@trpc/server"; import { sql } from "drizzle-orm"; @@ -657,6 +658,54 @@ export const settingsRouter = createTRPCRouter({ } return { status: "not_cloud" }; }), + setupGPU: adminProcedure + .input( + z.object({ + serverId: z.string().optional(), + }), + ) + .mutation(async ({ input }) => { + if (IS_CLOUD && !input.serverId) { + throw new Error("Select a server to enable the GPU Setup"); + } + + try { + await setupGPUSupport(input.serverId); + return { success: true }; + } catch (error) { + console.error("GPU Setup Error:", error); + throw error; + } + }), + checkGPUStatus: adminProcedure + .input( + z.object({ + serverId: z.string().optional(), + }), + ) + .query(async ({ input }) => { + if (IS_CLOUD && !input.serverId) { + return { + driverInstalled: false, + driverVersion: undefined, + gpuModel: undefined, + runtimeInstalled: false, + runtimeConfigured: false, + cudaSupport: undefined, + cudaVersion: undefined, + memoryInfo: undefined, + availableGPUs: 0, + swarmEnabled: false, + gpuResources: 0, + }; + } + + try { + return await checkGPUStatus(input.serverId || ""); + } catch (error) { + throw new Error("Failed to check GPU status"); + } + }), }); // { // "Parallelism": 1, diff --git a/apps/dokploy/server/api/trpc.ts b/apps/dokploy/server/api/trpc.ts index d37315c3..db4f7adf 100644 --- a/apps/dokploy/server/api/trpc.ts +++ b/apps/dokploy/server/api/trpc.ts @@ -21,7 +21,6 @@ import { import type { Session, User } from "lucia"; import superjson from "superjson"; import { ZodError } from "zod"; - /** * 1. CONTEXT * diff --git a/apps/dokploy/templates/blender/docker-compose.yml b/apps/dokploy/templates/blender/docker-compose.yml new file mode 100644 index 00000000..893f3dee --- /dev/null +++ b/apps/dokploy/templates/blender/docker-compose.yml @@ -0,0 +1,26 @@ +version: "3.8" + +services: + blender: + image: lscr.io/linuxserver/blender:latest + runtime: nvidia + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: + - gpu + environment: + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=all + - PUID=1000 + - PGID=1000 + - TZ=Etc/UTC + - SUBFOLDER=/ #optional + ports: + - 3000 + - 3001 + restart: unless-stopped + shm_size: 1gb diff --git a/apps/dokploy/templates/blender/index.ts b/apps/dokploy/templates/blender/index.ts new file mode 100644 index 00000000..84e52755 --- /dev/null +++ b/apps/dokploy/templates/blender/index.ts @@ -0,0 +1,34 @@ +import { + type DomainSchema, + type Schema, + type Template, + generateHash, + generateRandomDomain, +} from "../utils"; + +export function generate(schema: Schema): Template { + const mainServiceHash = generateHash(schema.projectName); + const mainDomain = generateRandomDomain(schema); + + const domains: DomainSchema[] = [ + { + host: mainDomain, + port: 3000, + serviceName: "blender", + }, + ]; + + const envs = [ + "PUID=1000", + "PGID=1000", + "TZ=Etc/UTC", + "SUBFOLDER=/", + "NVIDIA_VISIBLE_DEVICES=all", + "NVIDIA_DRIVER_CAPABILITIES=all", + ]; + + return { + envs, + domains, + }; +} diff --git a/apps/dokploy/templates/templates.ts b/apps/dokploy/templates/templates.ts index 37aeb1e1..322eb8e2 100644 --- a/apps/dokploy/templates/templates.ts +++ b/apps/dokploy/templates/templates.ts @@ -716,5 +716,20 @@ export const templates: TemplateData[] = [ }, tags: ["file", "sync"], load: () => import("./nextcloud-aio/index").then((m) => m.generate), + }, + { + id: "blender", + name: "Blender", + version: "latest", + description: + "Blender is a free and open-source 3D creation suite. It supports the entire 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, video editing and 2D animation pipeline.", + logo: "blender.svg", + links: { + github: "https://github.com/linuxserver/docker-blender", + website: "https://www.blender.org/", + docs: "https://docs.blender.org/", + }, + tags: ["3d", "rendering", "animation"], + load: () => import("./blender/index").then((m) => m.generate), }, ]; diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index abddc405..12f3b64e 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -116,3 +116,4 @@ export * from "./monitoring/utilts"; export * from "./db/validations/domain"; export * from "./db/validations/index"; +export * from "./utils/gpu-setup"; diff --git a/packages/server/src/utils/gpu-setup.ts b/packages/server/src/utils/gpu-setup.ts new file mode 100644 index 00000000..ce60adf1 --- /dev/null +++ b/packages/server/src/utils/gpu-setup.ts @@ -0,0 +1,349 @@ +import * as fs from "node:fs/promises"; +import { execAsync, sleep } from "../utils/process/execAsync"; +import { execAsyncRemote } from "../utils/process/execAsync"; + +interface GPUInfo { + driverInstalled: boolean; + driverVersion?: string; + gpuModel?: string; + runtimeInstalled: boolean; + runtimeConfigured: boolean; + cudaSupport: boolean; + cudaVersion?: string; + memoryInfo?: string; + availableGPUs: number; + swarmEnabled: boolean; + gpuResources: number; +} + +export async function checkGPUStatus(serverId?: string): Promise { + try { + const [driverInfo, runtimeInfo, swarmInfo, gpuInfo, cudaInfo] = + await Promise.all([ + checkGpuDriver(serverId), + checkRuntime(serverId), + checkSwarmResources(serverId), + checkGpuInfo(serverId), + checkCudaSupport(serverId), + ]); + + return { + ...driverInfo, + ...runtimeInfo, + ...swarmInfo, + ...gpuInfo, + ...cudaInfo, + }; + } catch (error) { + console.error("Error in checkGPUStatus:", error); + return { + driverInstalled: false, + driverVersion: undefined, + runtimeInstalled: false, + runtimeConfigured: false, + cudaSupport: false, + cudaVersion: undefined, + gpuModel: undefined, + memoryInfo: undefined, + availableGPUs: 0, + swarmEnabled: false, + gpuResources: 0, + }; + } +} + +const checkGpuDriver = async (serverId?: string) => { + let driverVersion: string | undefined; + let driverInstalled = false; + let availableGPUs = 0; + + try { + const driverCommand = + "nvidia-smi --query-gpu=driver_version --format=csv,noheader"; + const { stdout: nvidiaSmi } = serverId + ? await execAsyncRemote(serverId, driverCommand) + : await execAsync(driverCommand); + + driverVersion = nvidiaSmi.trim(); + if (driverVersion) { + driverInstalled = true; + const countCommand = + "nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l"; + const { stdout: gpuCount } = serverId + ? await execAsyncRemote(serverId, countCommand) + : await execAsync(countCommand); + + availableGPUs = Number.parseInt(gpuCount.trim(), 10); + } + } catch (error) { + console.debug("GPU driver check:", error); + } + + return { driverVersion, driverInstalled, availableGPUs }; +}; + +const checkRuntime = async (serverId?: string) => { + let runtimeInstalled = false; + let runtimeConfigured = false; + + try { + // First check: Is nvidia-container-runtime installed? + const checkBinaryCommand = "command -v nvidia-container-runtime"; + try { + const { stdout } = serverId + ? await execAsyncRemote(serverId, checkBinaryCommand) + : await execAsync(checkBinaryCommand); + runtimeInstalled = !!stdout.trim(); + } catch (error) { + console.debug("Runtime binary check:", error); + } + + // Second check: Is it configured in Docker? + try { + const runtimeCommand = 'docker info --format "{{json .Runtimes}}"'; + const { stdout: runtimeInfo } = serverId + ? await execAsyncRemote(serverId, runtimeCommand) + : await execAsync(runtimeCommand); + + const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"'; + const { stdout: defaultRuntime } = serverId + ? await execAsyncRemote(serverId, defaultCommand) + : await execAsync(defaultCommand); + + const runtimes = JSON.parse(runtimeInfo); + const hasNvidiaRuntime = "nvidia" in runtimes; + const isDefaultRuntime = defaultRuntime.trim() === "nvidia"; + + // Only set runtimeConfigured if both conditions are met + runtimeConfigured = hasNvidiaRuntime && isDefaultRuntime; + } catch (error) { + console.debug("Runtime configuration check:", error); + } + } catch (error) { + console.debug("Runtime check:", error); + } + + return { runtimeInstalled, runtimeConfigured }; +}; + +const checkSwarmResources = async (serverId?: string) => { + let swarmEnabled = false; + let gpuResources = 0; + + try { + const nodeCommand = + "docker node inspect self --format '{{json .Description.Resources.GenericResources}}'"; + const { stdout: resources } = serverId + ? await execAsyncRemote(serverId, nodeCommand) + : await execAsync(nodeCommand); + + if (resources && resources !== "null") { + const genericResources = JSON.parse(resources); + for (const resource of genericResources) { + if ( + resource.DiscreteResourceSpec && + (resource.DiscreteResourceSpec.Kind === "GPU" || + resource.DiscreteResourceSpec.Kind === "gpu") + ) { + gpuResources = resource.DiscreteResourceSpec.Value; + swarmEnabled = true; + break; + } + } + } + } catch (error) { + console.debug("Swarm resource check:", error); + } + + return { swarmEnabled, gpuResources }; +}; + +const checkGpuInfo = async (serverId?: string) => { + let gpuModel: string | undefined; + let memoryInfo: string | undefined; + + try { + const gpuInfoCommand = + "nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader"; + const { stdout: gpuInfo } = serverId + ? await execAsyncRemote(serverId, gpuInfoCommand) + : await execAsync(gpuInfoCommand); + + [gpuModel, memoryInfo] = gpuInfo.split(",").map((s) => s.trim()); + } catch (error) { + console.debug("GPU info check:", error); + } + + return { gpuModel, memoryInfo }; +}; + +const checkCudaSupport = async (serverId?: string) => { + let cudaVersion: string | undefined; + let cudaSupport = false; + + try { + const cudaCommand = 'nvidia-smi -q | grep "CUDA Version"'; + const { stdout: cudaInfo } = serverId + ? await execAsyncRemote(serverId, cudaCommand) + : await execAsync(cudaCommand); + + const cudaMatch = cudaInfo.match(/CUDA Version\s*:\s*([\d\.]+)/); + cudaVersion = cudaMatch ? cudaMatch[1] : undefined; + cudaSupport = !!cudaVersion; + } catch (error) { + console.debug("CUDA support check:", error); + } + + return { cudaVersion, cudaSupport }; +}; + +export async function setupGPUSupport(serverId?: string): Promise { + try { + // 1. Initial status check and validation + const initialStatus = await checkGPUStatus(serverId); + const shouldContinue = await validatePrerequisites(initialStatus); + if (!shouldContinue) return; + + // 2. Get node ID + const nodeId = await getNodeId(serverId); + + // 3. Create daemon configuration + const daemonConfig = createDaemonConfig(initialStatus.availableGPUs); + + // 4. Setup server based on environment + if (serverId) { + await setupRemoteServer(serverId, daemonConfig); + } else { + await setupLocalServer(daemonConfig); + } + + // 5. Wait for Docker restart + await sleep(10000); + + // 6. Add GPU label + await addGpuLabel(nodeId, serverId); + + // 7. Final verification + await sleep(5000); + await verifySetup(nodeId, serverId); + } catch (error) { + if ( + error instanceof Error && + error.message.includes("password is required") + ) { + throw new Error( + "Sudo access required. Please run with appropriate permissions.", + ); + } + throw error; + } +} + +const validatePrerequisites = async (initialStatus: GPUInfo) => { + if (!initialStatus.driverInstalled) { + throw new Error( + "NVIDIA drivers not installed. Please install appropriate NVIDIA drivers first.", + ); + } + + if (!initialStatus.runtimeInstalled) { + throw new Error( + "NVIDIA Container Runtime not installed. Please install nvidia-container-runtime first.", + ); + } + + if (initialStatus.swarmEnabled && initialStatus.runtimeConfigured) { + return false; + } + + return true; +}; + +const getNodeId = async (serverId?: string) => { + const nodeIdCommand = 'docker info --format "{{.Swarm.NodeID}}"'; + const { stdout: nodeId } = serverId + ? await execAsyncRemote(serverId, nodeIdCommand) + : await execAsync(nodeIdCommand); + + const trimmedNodeId = nodeId.trim(); + if (!trimmedNodeId) { + throw new Error("Setup Server before enabling GPU support"); + } + + return trimmedNodeId; +}; + +const createDaemonConfig = (availableGPUs: number) => ({ + runtimes: { + nvidia: { + path: "nvidia-container-runtime", + runtimeArgs: [], + }, + }, + "default-runtime": "nvidia", + "node-generic-resources": [`GPU=${availableGPUs}`], +}); + +const setupRemoteServer = async (serverId: string, daemonConfig: any) => { + const setupCommands = [ + "sudo -n true", + `echo '${JSON.stringify(daemonConfig, null, 2)}' | sudo tee /etc/docker/daemon.json`, + "sudo mkdir -p /etc/nvidia-container-runtime", + 'sudo sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml', + 'echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" | sudo tee -a /etc/nvidia-container-runtime/config.toml', + "sudo systemctl daemon-reload", + "sudo systemctl restart docker", + ].join(" && "); + + await execAsyncRemote(serverId, setupCommands); +}; + +const setupLocalServer = async (daemonConfig: any) => { + const configFile = `/tmp/docker-daemon-${Date.now()}.json`; + await fs.writeFile(configFile, JSON.stringify(daemonConfig, null, 2)); + + const setupCommands = [ + `pkexec sh -c ' + cp ${configFile} /etc/docker/daemon.json && + mkdir -p /etc/nvidia-container-runtime && + sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml && + echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" >> /etc/nvidia-container-runtime/config.toml && + systemctl daemon-reload && + systemctl restart docker + '`, + `rm ${configFile}`, + ].join(" && "); + + await execAsync(setupCommands); +}; + +const addGpuLabel = async (nodeId: string, serverId?: string) => { + const labelCommand = `docker node update --label-add gpu=true ${nodeId}`; + if (serverId) { + await execAsyncRemote(serverId, labelCommand); + } else { + await execAsync(labelCommand); + } +}; + +const verifySetup = async (nodeId: string, serverId?: string) => { + const finalStatus = await checkGPUStatus(serverId); + + if (!finalStatus.swarmEnabled) { + const diagnosticCommands = [ + `docker node inspect ${nodeId}`, + 'nvidia-smi -a | grep "GPU UUID"', + "cat /etc/docker/daemon.json", + "cat /etc/nvidia-container-runtime/config.toml", + ].join(" && "); + + const { stdout: diagnostics } = serverId + ? await execAsyncRemote(serverId, diagnosticCommands) + : await execAsync(diagnosticCommands); + + console.error("Diagnostic Information:", diagnostics); + throw new Error("GPU support not detected in swarm after setup"); + } + + return finalStatus; +};