From 15a76d263982cb1dec8c90d01848008209216ce8 Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Thu, 17 Oct 2024 15:45:27 +0530 Subject: [PATCH 01/58] feat: Add server-level GPU support for Docker Swarm deployments and API endpoint for setup --- apps/dokploy/server/api/trpc.ts | 8 ++++++++ packages/server/src/constants/index.ts | 3 +++ packages/server/src/utils/gpu-setup.ts | 9 +++++++++ 3 files changed, 20 insertions(+) create mode 100644 packages/server/src/utils/gpu-setup.ts diff --git a/apps/dokploy/server/api/trpc.ts b/apps/dokploy/server/api/trpc.ts index d37315c3..8aec99ec 100644 --- a/apps/dokploy/server/api/trpc.ts +++ b/apps/dokploy/server/api/trpc.ts @@ -21,6 +21,7 @@ import { import type { Session, User } from "lucia"; import superjson from "superjson"; import { ZodError } from "zod"; +import { setupGPUSupport } from '@dokploy/server/src/utils/gpu-setup'; /** * 1. CONTEXT @@ -208,3 +209,10 @@ export const adminProcedure = t.procedure.use(({ ctx, next }) => { }, }); }); + +const appRouter = t.router({ + setupGPU: t.procedure.mutation(async () => { + await setupGPUSupport(); + return { success: true }; + }), + }); \ No newline at end of file diff --git a/packages/server/src/constants/index.ts b/packages/server/src/constants/index.ts index f2f1a4d8..fd89a53d 100644 --- a/packages/server/src/constants/index.ts +++ b/packages/server/src/constants/index.ts @@ -37,3 +37,6 @@ export const paths = (isServer = false) => { REGISTRY_PATH: `${BASE_PATH}/registry`, }; }; + +export const GPU_ENABLED = process.env.GPU_ENABLED === 'true'; +export const GPU_RESOURCE_NAME = 'DOCKER_RESOURCE_GPU'; \ No newline at end of file diff --git a/packages/server/src/utils/gpu-setup.ts b/packages/server/src/utils/gpu-setup.ts new file mode 100644 index 00000000..459c3395 --- /dev/null +++ b/packages/server/src/utils/gpu-setup.ts @@ -0,0 +1,9 @@ +import { docker } from '../constants'; + +export async function setupGPUSupport() { + await docker.swarmUpdate({ + TaskDefaults: { + GenericResources: [{ DiscreteResourceSpec: { Kind: 'gpu', Value: 1 } }] + } + }); +} \ No newline at end of file From e52a0fc9d4785a881c974ab6d0d7ec35967f214e Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Fri, 18 Oct 2024 04:55:37 +0530 Subject: [PATCH 02/58] feat: Added Blender template --- apps/dokploy/public/templates/blender.svg | 153 ++++++++++++++++++ .../templates/blender/docker-compose.yml | 37 +++++ apps/dokploy/templates/blender/index.ts | 34 ++++ apps/dokploy/templates/templates.ts | 14 ++ 4 files changed, 238 insertions(+) create mode 100644 apps/dokploy/public/templates/blender.svg create mode 100644 apps/dokploy/templates/blender/docker-compose.yml create mode 100644 apps/dokploy/templates/blender/index.ts diff --git a/apps/dokploy/public/templates/blender.svg b/apps/dokploy/public/templates/blender.svg new file mode 100644 index 00000000..e59079f5 --- /dev/null +++ b/apps/dokploy/public/templates/blender.svg @@ -0,0 +1,153 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/apps/dokploy/templates/blender/docker-compose.yml b/apps/dokploy/templates/blender/docker-compose.yml new file mode 100644 index 00000000..bc3de4b7 --- /dev/null +++ b/apps/dokploy/templates/blender/docker-compose.yml @@ -0,0 +1,37 @@ +version: "3.8" + +services: + blender: + image: lscr.io/linuxserver/blender:latest + privileged: true + container_name: blender + security_opt: + - seccomp:unconfined #optional + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: + - compute + - video + - graphics + - utility + environment: + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=all + - PUID=1000 + - PGID=1000 + - TZ=Etc/UTC + - SUBFOLDER=/ #optional + volumes: + - blender:/config + ports: + - 3000:3000 + - 3001:3001 + restart: unless-stopped + shm_size: 1gb + +volumes: + blender: null diff --git a/apps/dokploy/templates/blender/index.ts b/apps/dokploy/templates/blender/index.ts new file mode 100644 index 00000000..088e6fcc --- /dev/null +++ b/apps/dokploy/templates/blender/index.ts @@ -0,0 +1,34 @@ +import { + generateHash, + generateRandomDomain, + type Template, + type Schema, + type DomainSchema, +} from "../utils"; + +export function generate(schema: Schema): Template { + const mainServiceHash = generateHash(schema.projectName); + const mainDomain = generateRandomDomain(schema); + + const domains: DomainSchema[] = [ + { + host: mainDomain, + port: 3000, + serviceName: "blender", + }, + ]; + + const envs = [ + `PUID=1000`, + `PGID=1000`, + `TZ=Etc/UTC`, + `SUBFOLDER=/`, + `NVIDIA_VISIBLE_DEVICES=all`, + `NVIDIA_DRIVER_CAPABILITIES=all`, + ]; + + return { + envs, + domains, + }; +} diff --git a/apps/dokploy/templates/templates.ts b/apps/dokploy/templates/templates.ts index afe9d1b6..e5acb390 100644 --- a/apps/dokploy/templates/templates.ts +++ b/apps/dokploy/templates/templates.ts @@ -512,4 +512,18 @@ export const templates: TemplateData[] = [ tags: ["self-hosted", "email", "webmail"], load: () => import("./roundcube/index").then((m) => m.generate), }, + { + id: "blender", + name: "Blender", + version: "latest", + description: "Blender is a free and open-source 3D creation suite. It supports the entire 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, video editing and 2D animation pipeline.", + logo: "blender.svg", + links: { + github: "https://github.com/linuxserver/docker-blender", + website: "https://www.blender.org/", + docs: "https://docs.blender.org/", + }, + tags: ["3d", "rendering", "animation"], + load: () => import("./blender/index").then((m) => m.generate), + }, ]; From 5a440d934d5ac1d5aae8940a3fb507ae761dacbb Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Fri, 25 Oct 2024 02:32:50 +0530 Subject: [PATCH 03/58] fix: Remove privileged mode and seccomp option, update runtime to nvidia --- apps/dokploy/templates/blender/docker-compose.yml | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/apps/dokploy/templates/blender/docker-compose.yml b/apps/dokploy/templates/blender/docker-compose.yml index bc3de4b7..90fa8da8 100644 --- a/apps/dokploy/templates/blender/docker-compose.yml +++ b/apps/dokploy/templates/blender/docker-compose.yml @@ -3,10 +3,8 @@ version: "3.8" services: blender: image: lscr.io/linuxserver/blender:latest - privileged: true container_name: blender - security_opt: - - seccomp:unconfined #optional + runtime: nvidia deploy: resources: reservations: @@ -14,10 +12,7 @@ services: - driver: nvidia count: all capabilities: - - compute - - video - - graphics - - utility + - gpu environment: - NVIDIA_VISIBLE_DEVICES=all - NVIDIA_DRIVER_CAPABILITIES=all @@ -25,13 +20,8 @@ services: - PGID=1000 - TZ=Etc/UTC - SUBFOLDER=/ #optional - volumes: - - blender:/config ports: - 3000:3000 - 3001:3001 restart: unless-stopped shm_size: 1gb - -volumes: - blender: null From 3e467959c9232b332dbbaaafae66d8c0cd76097b Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Sun, 27 Oct 2024 22:00:08 +0530 Subject: [PATCH 04/58] refactor: Update docker-compose.yml to remove port mapping and remove GPU constants from index.ts --- apps/dokploy/templates/blender/docker-compose.yml | 4 ++-- packages/server/src/constants/index.ts | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/apps/dokploy/templates/blender/docker-compose.yml b/apps/dokploy/templates/blender/docker-compose.yml index 90fa8da8..da769c6b 100644 --- a/apps/dokploy/templates/blender/docker-compose.yml +++ b/apps/dokploy/templates/blender/docker-compose.yml @@ -21,7 +21,7 @@ services: - TZ=Etc/UTC - SUBFOLDER=/ #optional ports: - - 3000:3000 - - 3001:3001 + - 3000 + - 3001 restart: unless-stopped shm_size: 1gb diff --git a/packages/server/src/constants/index.ts b/packages/server/src/constants/index.ts index fd89a53d..be2a72de 100644 --- a/packages/server/src/constants/index.ts +++ b/packages/server/src/constants/index.ts @@ -36,7 +36,4 @@ export const paths = (isServer = false) => { MONITORING_PATH: `${BASE_PATH}/monitoring`, REGISTRY_PATH: `${BASE_PATH}/registry`, }; -}; - -export const GPU_ENABLED = process.env.GPU_ENABLED === 'true'; -export const GPU_RESOURCE_NAME = 'DOCKER_RESOURCE_GPU'; \ No newline at end of file +}; \ No newline at end of file From 1b6d8d803b34482ab56c692034ace63a4fb15d80 Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Sat, 2 Nov 2024 15:15:58 +0530 Subject: [PATCH 05/58] feat: Added GPU support feature for Remote Server with setup and status checks, including API endpoints and utility functions --- .../settings/servers/setup-server.tsx | 12 +- apps/dokploy/server/api/routers/settings.ts | 60 ++++ apps/dokploy/server/api/trpc.ts | 9 - apps/dokploy/templates/blender/index.ts | 52 ++-- apps/dokploy/templates/templates.ts | 3 +- packages/server/src/constants/index.ts | 2 +- packages/server/src/index.ts | 1 + packages/server/src/utils/gpu-setup.ts | 268 +++++++++++++++++- 8 files changed, 361 insertions(+), 46 deletions(-) diff --git a/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx b/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx index 8bfcf4da..119d4d29 100644 --- a/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx +++ b/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx @@ -32,6 +32,7 @@ import Link from "next/link"; import { useState } from "react"; import { toast } from "sonner"; import { ShowDeployment } from "../../application/deployments/show-deployment"; +import { GPUSupport } from "./gpu-support"; interface Props { serverId: string; @@ -89,9 +90,10 @@ export const SetupServer = ({ serverId }: Props) => { ) : (
- + SSH Keys Deployments + GPU Setup {
+ +
+ +
+
)} diff --git a/apps/dokploy/server/api/routers/settings.ts b/apps/dokploy/server/api/routers/settings.ts index e1e63579..4a000889 100644 --- a/apps/dokploy/server/api/routers/settings.ts +++ b/apps/dokploy/server/api/routers/settings.ts @@ -52,6 +52,10 @@ import { writeMainConfig, writeTraefikConfigInPath, } from "@dokploy/server"; +import { + checkGPUStatus, + setupGPUSupport, +} from "@dokploy/server/src/utils/gpu-setup"; import { generateOpenApiDocument } from "@dokploy/trpc-openapi"; import { TRPCError } from "@trpc/server"; import { sql } from "drizzle-orm"; @@ -650,6 +654,62 @@ export const settingsRouter = createTRPCRouter({ } return { status: "not_cloud" }; }), + setupGPU: adminProcedure + .input( + z.object({ + serverId: z.string(), + }), + ) + .mutation(async ({ input }) => { + try { + if (IS_CLOUD) { + return { success: true }; + } + + if (!input.serverId) { + throw new TRPCError({ + code: "BAD_REQUEST", + message: "Server ID is required", + }); + } + + await setupGPUSupport(input.serverId); + return { success: true }; + } catch (error) { + throw new TRPCError({ + code: "INTERNAL_SERVER_ERROR", + message: + error instanceof Error + ? error.message + : "Failed to enable GPU support", + cause: error, + }); + } + }), + checkGPUStatus: adminProcedure + .input( + z.object({ + serverId: z.string().optional(), + }), + ) + .query(async ({ input }) => { + if (IS_CLOUD) { + return { + driverInstalled: false, + driverVersion: undefined, + gpuModel: undefined, + runtimeInstalled: false, + runtimeConfigured: false, + cudaSupport: undefined, + cudaVersion: undefined, + memoryInfo: undefined, + availableGPUs: 0, + swarmEnabled: false, + gpuResources: 0, + }; + } + return await checkGPUStatus(input.serverId); + }), }); // { // "Parallelism": 1, diff --git a/apps/dokploy/server/api/trpc.ts b/apps/dokploy/server/api/trpc.ts index 8aec99ec..db4f7adf 100644 --- a/apps/dokploy/server/api/trpc.ts +++ b/apps/dokploy/server/api/trpc.ts @@ -21,8 +21,6 @@ import { import type { Session, User } from "lucia"; import superjson from "superjson"; import { ZodError } from "zod"; -import { setupGPUSupport } from '@dokploy/server/src/utils/gpu-setup'; - /** * 1. CONTEXT * @@ -209,10 +207,3 @@ export const adminProcedure = t.procedure.use(({ ctx, next }) => { }, }); }); - -const appRouter = t.router({ - setupGPU: t.procedure.mutation(async () => { - await setupGPUSupport(); - return { success: true }; - }), - }); \ No newline at end of file diff --git a/apps/dokploy/templates/blender/index.ts b/apps/dokploy/templates/blender/index.ts index 088e6fcc..baf243e0 100644 --- a/apps/dokploy/templates/blender/index.ts +++ b/apps/dokploy/templates/blender/index.ts @@ -1,34 +1,34 @@ import { - generateHash, - generateRandomDomain, - type Template, - type Schema, - type DomainSchema, + type DomainSchema, + type Schema, + type Template, + generateHash, + generateRandomDomain, } from "../utils"; export function generate(schema: Schema): Template { - const mainServiceHash = generateHash(schema.projectName); - const mainDomain = generateRandomDomain(schema); + const mainServiceHash = generateHash(schema.projectName); + const mainDomain = generateRandomDomain(schema); - const domains: DomainSchema[] = [ - { - host: mainDomain, - port: 3000, - serviceName: "blender", - }, - ]; + const domains: DomainSchema[] = [ + { + host: mainDomain, + port: 3000, + serviceName: "blender", + }, + ]; - const envs = [ - `PUID=1000`, - `PGID=1000`, - `TZ=Etc/UTC`, - `SUBFOLDER=/`, - `NVIDIA_VISIBLE_DEVICES=all`, - `NVIDIA_DRIVER_CAPABILITIES=all`, - ]; + const envs = [ + `PUID=1000`, + `PGID=1000`, + `TZ=Etc/UTC`, + `SUBFOLDER=/`, + `NVIDIA_VISIBLE_DEVICES=all`, + `NVIDIA_DRIVER_CAPABILITIES=all`, + ]; - return { - envs, - domains, - }; + return { + envs, + domains, + }; } diff --git a/apps/dokploy/templates/templates.ts b/apps/dokploy/templates/templates.ts index 40d493e5..115a1ecf 100644 --- a/apps/dokploy/templates/templates.ts +++ b/apps/dokploy/templates/templates.ts @@ -516,7 +516,8 @@ export const templates: TemplateData[] = [ id: "blender", name: "Blender", version: "latest", - description: "Blender is a free and open-source 3D creation suite. It supports the entire 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, video editing and 2D animation pipeline.", + description: + "Blender is a free and open-source 3D creation suite. It supports the entire 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, video editing and 2D animation pipeline.", logo: "blender.svg", links: { github: "https://github.com/linuxserver/docker-blender", diff --git a/packages/server/src/constants/index.ts b/packages/server/src/constants/index.ts index be2a72de..f2f1a4d8 100644 --- a/packages/server/src/constants/index.ts +++ b/packages/server/src/constants/index.ts @@ -36,4 +36,4 @@ export const paths = (isServer = false) => { MONITORING_PATH: `${BASE_PATH}/monitoring`, REGISTRY_PATH: `${BASE_PATH}/registry`, }; -}; \ No newline at end of file +}; diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 06f2bc87..90daec2d 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -118,3 +118,4 @@ export * from "./monitoring/utilts"; export * from "./db/validations/domain"; export * from "./db/validations/index"; +export * from "./utils/gpu-setup"; diff --git a/packages/server/src/utils/gpu-setup.ts b/packages/server/src/utils/gpu-setup.ts index 459c3395..71f3bf0f 100644 --- a/packages/server/src/utils/gpu-setup.ts +++ b/packages/server/src/utils/gpu-setup.ts @@ -1,9 +1,261 @@ -import { docker } from '../constants'; +import { docker } from "../constants"; +import { execAsync } from "../utils/process/execAsync"; +import { execAsyncRemote } from "../utils/process/execAsync"; +import { getRemoteDocker } from "./servers/remote-docker"; -export async function setupGPUSupport() { - await docker.swarmUpdate({ - TaskDefaults: { - GenericResources: [{ DiscreteResourceSpec: { Kind: 'gpu', Value: 1 } }] - } - }); -} \ No newline at end of file +interface GPUInfo { + driverInstalled: boolean; + driverVersion?: string; + gpuModel?: string; + runtimeInstalled: boolean; + runtimeConfigured: boolean; + cudaSupport: boolean; + cudaVersion?: string; + memoryInfo?: string; + availableGPUs: number; + swarmEnabled: boolean; + gpuResources: number; +} + +interface DiscreteResourceSpec { + Kind: string; + Value: number; +} + +interface NamedGenericResource { + NamedResourceSpec?: { Kind: string; Value: string }; + DiscreteResourceSpec?: DiscreteResourceSpec; +} + +export async function checkGPUStatus(serverId?: string): Promise { + try { + // Check NVIDIA Driver + let driverInstalled = false; + let driverVersion: string | undefined; + let availableGPUs = 0; + + try { + const driverCommand = + "nvidia-smi --query-gpu=driver_version --format=csv,noheader"; + const { stdout: nvidiaSmi } = serverId + ? await execAsyncRemote(serverId, driverCommand) + : await execAsync(driverCommand); + + driverVersion = nvidiaSmi.trim(); + if (driverVersion) { + driverInstalled = true; + const countCommand = + "nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l"; + const { stdout: gpuCount } = serverId + ? await execAsyncRemote(serverId, countCommand) + : await execAsync(countCommand); + + availableGPUs = Number.parseInt(gpuCount.trim(), 10); + } + } catch (error) { + console.debug("GPU driver check:", error); + } + + // Check Runtime Configuration + let runtimeInstalled = false; + let runtimeConfigured = false; + try { + const runtimeCommand = 'docker info --format "{{json .Runtimes}}"'; + const { stdout: runtimeInfo } = serverId + ? await execAsyncRemote(serverId, runtimeCommand) + : await execAsync(runtimeCommand); + + const runtimes = JSON.parse(runtimeInfo); + runtimeInstalled = "nvidia" in runtimes; + + // Check if it's the default runtime + const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"'; + const { stdout: defaultRuntime } = serverId + ? await execAsyncRemote(serverId, defaultCommand) + : await execAsync(defaultCommand); + + runtimeConfigured = defaultRuntime.trim() === "nvidia"; + } catch (error) { + console.debug("Runtime check:", error); + } + + // Check Swarm GPU Resources + let swarmEnabled = false; + let gpuResources = 0; + + try { + // Check node resources directly from inspect + const nodeCommand = + "docker node inspect self --format '{{json .Description.Resources.GenericResources}}'"; + const { stdout: resources } = serverId + ? await execAsyncRemote(serverId, nodeCommand) + : await execAsync(nodeCommand); + + if (resources && resources !== "null") { + const genericResources = JSON.parse(resources); + for (const resource of genericResources) { + if ( + resource.DiscreteResourceSpec && + (resource.DiscreteResourceSpec.Kind === "GPU" || + resource.DiscreteResourceSpec.Kind === "gpu") + ) { + gpuResources = resource.DiscreteResourceSpec.Value; + swarmEnabled = true; + break; + } + } + } + } catch (error) { + console.debug("Swarm resource check:", error); + } + + // Get GPU Model and Memory Info + const gpuInfoCommand = + "nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader"; + const { stdout: gpuInfo } = serverId + ? await execAsyncRemote(serverId, gpuInfoCommand) + : await execAsync(gpuInfoCommand); + + const [gpuModel, memoryTotal] = gpuInfo.split(",").map((s) => s.trim()); + + // Check CUDA Support + const cudaCommand = 'nvidia-smi -q | grep "CUDA Version"'; + const { stdout: cudaInfo } = serverId + ? await execAsyncRemote(serverId, cudaCommand) + : await execAsync(cudaCommand); + + const cudaMatch = cudaInfo.match(/CUDA Version\s*:\s*([\d\.]+)/); + const cudaVersion = cudaMatch ? cudaMatch[1] : undefined; + const cudaSupport = !!cudaVersion; + + return { + driverInstalled, + driverVersion, + runtimeInstalled, + runtimeConfigured, + availableGPUs, + swarmEnabled, + gpuResources, + gpuModel, + memoryInfo: memoryTotal, + cudaSupport, + cudaVersion, + }; + } catch (error) { + console.error("Error in checkGPUStatus:", error); + return { + driverInstalled: false, + driverVersion: undefined, + runtimeInstalled: false, + runtimeConfigured: false, + cudaSupport: false, + cudaVersion: undefined, + gpuModel: undefined, + memoryInfo: undefined, + availableGPUs: 0, + swarmEnabled: false, + gpuResources: 0, + }; + } +} + +export async function setupGPUSupport(serverId?: string): Promise { + try { + // 1. Check current GPU status first + const initialStatus = await checkGPUStatus(serverId); + + // If GPU is already configured, just verify and return quickly + if ( + initialStatus.swarmEnabled && + initialStatus.runtimeConfigured && + initialStatus.driverInstalled + ) { + console.log("GPU already configured, skipping setup"); + return; + } + + // 2. Verify GPU prerequisites + if (!initialStatus.driverInstalled || !initialStatus.runtimeInstalled) { + throw new Error( + "NVIDIA drivers or runtime not installed. Please install them first.", + ); + } + + // Get the node ID + const nodeIdCommand = 'docker info --format "{{.Swarm.NodeID}}"'; + const { stdout: nodeId } = serverId + ? await execAsyncRemote(serverId, nodeIdCommand) + : await execAsync(nodeIdCommand); + + if (!nodeId.trim()) { + throw new Error("Setup Server before enabling GPU support"); + } + + // 3. Configure NVIDIA runtime in daemon.json + const daemonConfig = { + runtimes: { + nvidia: { + path: "nvidia-container-runtime", + runtimeArgs: [], + }, + }, + "default-runtime": "nvidia", + "node-generic-resources": [`GPU=${initialStatus.availableGPUs}`], + }; + + const setupCommands = [ + "sudo -n true", + `echo '${JSON.stringify(daemonConfig, null, 2)}' | sudo tee /etc/docker/daemon.json`, + "sudo mkdir -p /etc/nvidia-container-runtime", + 'echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" | sudo tee -a /etc/nvidia-container-runtime/config.toml', + "sudo systemctl daemon-reload", + "sudo systemctl restart docker", + ].join(" && "); + + if (serverId) { + await execAsyncRemote(serverId, setupCommands); + } else { + await execAsync(setupCommands); + } + + // 4. Reduced wait time for Docker restart + await new Promise((resolve) => setTimeout(resolve, 10000)); + + // 5. Add GPU label to the node + const labelCommand = `docker node update --label-add gpu=true ${nodeId.trim()}`; + if (serverId) { + await execAsyncRemote(serverId, labelCommand); + } else { + await execAsync(labelCommand); + } + + // 6. Quick final verification + await new Promise((resolve) => setTimeout(resolve, 5000)); + const finalStatus = await checkGPUStatus(serverId); + + if (!finalStatus.swarmEnabled) { + const diagnosticCommands = [ + `docker node inspect ${nodeId.trim()}`, + 'nvidia-smi -a | grep "GPU UUID"', + "cat /etc/docker/daemon.json", + "cat /etc/nvidia-container-runtime/config.toml", + ].join(" && "); + + const { stdout: diagnostics } = serverId + ? await execAsyncRemote(serverId, diagnosticCommands) + : await execAsync(diagnosticCommands); + + console.error("Diagnostic Information:", diagnostics); + throw new Error("GPU support not detected in swarm after setup"); + } + + console.log("GPU setup completed successfully:", { + availableGPUs: initialStatus.availableGPUs, + driverVersion: initialStatus.driverVersion, + nodeId: nodeId.trim(), + }); + } catch (error) { + console.error("GPU Setup Error:", error); + throw error; + } +} From ed7150fac10e0ea7645cd3f0a72b0c1406f33f6c Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Sun, 3 Nov 2024 04:16:51 +0530 Subject: [PATCH 06/58] fix: Remove unused imports and interfaces from gpu-setup.ts --- packages/server/src/utils/gpu-setup.ts | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/packages/server/src/utils/gpu-setup.ts b/packages/server/src/utils/gpu-setup.ts index 71f3bf0f..f1936bf6 100644 --- a/packages/server/src/utils/gpu-setup.ts +++ b/packages/server/src/utils/gpu-setup.ts @@ -1,7 +1,5 @@ -import { docker } from "../constants"; import { execAsync } from "../utils/process/execAsync"; import { execAsyncRemote } from "../utils/process/execAsync"; -import { getRemoteDocker } from "./servers/remote-docker"; interface GPUInfo { driverInstalled: boolean; @@ -17,16 +15,6 @@ interface GPUInfo { gpuResources: number; } -interface DiscreteResourceSpec { - Kind: string; - Value: number; -} - -interface NamedGenericResource { - NamedResourceSpec?: { Kind: string; Value: string }; - DiscreteResourceSpec?: DiscreteResourceSpec; -} - export async function checkGPUStatus(serverId?: string): Promise { try { // Check NVIDIA Driver From 7306d8c5139f4c41c2b2334c0a3c8d2432c44c88 Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Sun, 3 Nov 2024 21:34:03 +0530 Subject: [PATCH 07/58] feat: Add GPU configuration and Update import path for gpu-setup functions --- .../settings/servers/gpu-support.tsx | 219 ++++++++++++++++++ apps/dokploy/server/api/routers/settings.ts | 2 +- 2 files changed, 220 insertions(+), 1 deletion(-) create mode 100644 apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx diff --git a/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx new file mode 100644 index 00000000..a0ef8d80 --- /dev/null +++ b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx @@ -0,0 +1,219 @@ +import { Button } from '@/components/ui/button'; +import { useState } from 'react'; +import { api } from '@/utils/api'; +import { toast } from 'sonner'; +import { TRPCClientError } from '@trpc/client'; +import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'; +import { DialogAction } from '@/components/shared/dialog-action'; +import { AlertBlock } from '@/components/shared/alert-block'; +import { Cpu, CheckCircle2, XCircle, Loader2 } from 'lucide-react'; + +interface GPUSupportProps { + serverId?: string; +} + +export function GPUSupport({ serverId }: GPUSupportProps) { + const [isLoading, setIsLoading] = useState(false); + const utils = api.useContext(); + + const { data: gpuStatus, isLoading: isChecking } = api.settings.checkGPUStatus.useQuery( + { serverId }, + { + enabled: !!serverId, + refetchInterval: 5000 + } + ); + +const setupGPU = api.settings.setupGPU.useMutation({ + onMutate: () => { + setIsLoading(true); + }, + onSuccess: async () => { + toast.success('GPU support enabled successfully'); + setIsLoading(false); + + await Promise.all([ + utils.settings.checkGPUStatus.invalidate({ serverId }), + utils.server.invalidate() + ]); + }, + onError: (error) => { + if (error instanceof TRPCClientError) { + const errorMessage = error.message; + if (errorMessage.includes('permission denied')) { + toast.error('Permission denied. Please ensure proper sudo access.'); + } else if (errorMessage.includes('Failed to configure GPU')) { + toast.error('GPU configuration failed. Please check system requirements.'); + } else { + toast.error(errorMessage); + } + } else { + toast.error('Failed to enable GPU support. Please check server logs.'); + } + + setIsLoading(false); + } +}); + + const handleEnableGPU = async () => { + if (!serverId) { + toast.error('No server selected'); + return; + } + + try { + await setupGPU.mutateAsync({ serverId }); + } catch (error) { + // Error handling is done in mutation's onError + } + }; + + return ( + +
+ + +
+
+
+ + GPU Configuration +
+ Configure and monitor GPU support +
+ + + +
+
+ + + +
System Requirements:
+
    +
  • NVIDIA drivers must be installed on the host system
  • +
  • NVIDIA Container Runtime is required for GPU support
  • +
  • Compatible GPU hardware must be present
  • +
+
+ + {isChecking ? ( +
+ + Checking GPU status... +
+ ) : ( +
+ {/* Prerequisites Section */} +
+

Prerequisites

+

Shows all software checks and available hardware

+
+ + + + + + +
+
+ + {/* Configuration Status */} +
+

Docker Swarm GPU Status

+

Shows the configuration state that changes with the Enable GPU

+
+ + +
+
+
+ )} +
+
+
+
+ ); +} + +interface StatusRowProps { + label: string; + isEnabled?: boolean; + description?: string; + value?: string | number; + showIcon?: boolean; +} + +function StatusRow({ label, isEnabled, description, value, showIcon = true }: StatusRowProps) { + return ( +
+ {label} +
+ {showIcon ? ( + <> + {isEnabled ? ( + + ) : ( + + )} + + {description || (isEnabled ? 'Installed' : 'Not Installed')} + + + ) : ( + {value} + )} +
+
+ ); +} \ No newline at end of file diff --git a/apps/dokploy/server/api/routers/settings.ts b/apps/dokploy/server/api/routers/settings.ts index 13f67126..94167a2e 100644 --- a/apps/dokploy/server/api/routers/settings.ts +++ b/apps/dokploy/server/api/routers/settings.ts @@ -55,7 +55,7 @@ import { import { checkGPUStatus, setupGPUSupport, -} from "@dokploy/server/src/utils/gpu-setup"; +} from "@dokploy/server"; import { generateOpenApiDocument } from "@dokploy/trpc-openapi"; import { TRPCError } from "@trpc/server"; import { sql } from "drizzle-orm"; From b53da82204eb5a9f180d78d1f7f52356357868e5 Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Tue, 5 Nov 2024 12:07:35 +0530 Subject: [PATCH 08/58] refactor: gpu support component and related api routers; update template environment variables --- .../settings/servers/gpu-support.tsx | 443 ++++++++++-------- apps/dokploy/server/api/routers/settings.ts | 5 +- apps/dokploy/templates/blender/index.ts | 12 +- 3 files changed, 249 insertions(+), 211 deletions(-) diff --git a/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx index a0ef8d80..ae931a3a 100644 --- a/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx +++ b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx @@ -1,219 +1,260 @@ -import { Button } from '@/components/ui/button'; -import { useState } from 'react'; -import { api } from '@/utils/api'; -import { toast } from 'sonner'; -import { TRPCClientError } from '@trpc/client'; -import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'; -import { DialogAction } from '@/components/shared/dialog-action'; -import { AlertBlock } from '@/components/shared/alert-block'; -import { Cpu, CheckCircle2, XCircle, Loader2 } from 'lucide-react'; +import { AlertBlock } from "@/components/shared/alert-block"; +import { DialogAction } from "@/components/shared/dialog-action"; +import { Button } from "@/components/ui/button"; +import { + Card, + CardContent, + CardDescription, + CardHeader, + CardTitle, +} from "@/components/ui/card"; +import { api } from "@/utils/api"; +import { TRPCClientError } from "@trpc/client"; +import { CheckCircle2, Cpu, Loader2, XCircle } from "lucide-react"; +import { useState } from "react"; +import { toast } from "sonner"; interface GPUSupportProps { - serverId?: string; + serverId?: string; } export function GPUSupport({ serverId }: GPUSupportProps) { - const [isLoading, setIsLoading] = useState(false); - const utils = api.useContext(); + const [isLoading, setIsLoading] = useState(false); + const utils = api.useContext(); - const { data: gpuStatus, isLoading: isChecking } = api.settings.checkGPUStatus.useQuery( - { serverId }, - { - enabled: !!serverId, - refetchInterval: 5000 - } - ); + const { data: gpuStatus, isLoading: isChecking } = + api.settings.checkGPUStatus.useQuery( + { serverId }, + { + enabled: !!serverId, + refetchInterval: 5000, + }, + ); -const setupGPU = api.settings.setupGPU.useMutation({ - onMutate: () => { - setIsLoading(true); - }, - onSuccess: async () => { - toast.success('GPU support enabled successfully'); - setIsLoading(false); - - await Promise.all([ - utils.settings.checkGPUStatus.invalidate({ serverId }), - utils.server.invalidate() - ]); - }, - onError: (error) => { - if (error instanceof TRPCClientError) { - const errorMessage = error.message; - if (errorMessage.includes('permission denied')) { - toast.error('Permission denied. Please ensure proper sudo access.'); - } else if (errorMessage.includes('Failed to configure GPU')) { - toast.error('GPU configuration failed. Please check system requirements.'); - } else { - toast.error(errorMessage); - } - } else { - toast.error('Failed to enable GPU support. Please check server logs.'); - } - - setIsLoading(false); - } -}); + const setupGPU = api.settings.setupGPU.useMutation({ + onMutate: () => { + setIsLoading(true); + }, + onSuccess: async () => { + toast.success("GPU support enabled successfully"); + setIsLoading(false); - const handleEnableGPU = async () => { - if (!serverId) { - toast.error('No server selected'); - return; - } + await Promise.all([ + utils.settings.checkGPUStatus.invalidate({ serverId }), + utils.server.invalidate(), + ]); + }, + onError: (error) => { + if (error instanceof TRPCClientError) { + const errorMessage = error.message; + if (errorMessage.includes("permission denied")) { + toast.error("Permission denied. Please ensure proper sudo access."); + } else if (errorMessage.includes("Failed to configure GPU")) { + toast.error( + "GPU configuration failed. Please check system requirements.", + ); + } else { + toast.error(errorMessage); + } + } else { + toast.error("Failed to enable GPU support. Please check server logs."); + } - try { - await setupGPU.mutateAsync({ serverId }); - } catch (error) { - // Error handling is done in mutation's onError - } - }; + setIsLoading(false); + }, + }); - return ( - -
- - -
-
-
- - GPU Configuration -
- Configure and monitor GPU support -
- - - -
-
+ const handleEnableGPU = async () => { + if (!serverId) { + toast.error("No server selected"); + return; + } - - -
System Requirements:
-
    -
  • NVIDIA drivers must be installed on the host system
  • -
  • NVIDIA Container Runtime is required for GPU support
  • -
  • Compatible GPU hardware must be present
  • -
-
+ try { + await setupGPU.mutateAsync({ serverId }); + } catch (error) { + // Error handling is done in mutation's onError + } + }; - {isChecking ? ( -
- - Checking GPU status... -
- ) : ( -
- {/* Prerequisites Section */} -
-

Prerequisites

-

Shows all software checks and available hardware

-
- - - - - - -
-
+ return ( + +
+ + +
+
+
+ + GPU Configuration +
+ + Configure and monitor GPU support + +
+ + + +
+
- {/* Configuration Status */} -
-

Docker Swarm GPU Status

-

Shows the configuration state that changes with the Enable GPU

-
- - -
-
-
- )} -
- -
-
- ); + + +
System Requirements:
+
    +
  • NVIDIA drivers must be installed on the host system
  • +
  • NVIDIA Container Runtime is required for GPU support
  • +
  • Compatible GPU hardware must be present
  • +
+
+ + {isChecking ? ( +
+ + Checking GPU status... +
+ ) : ( +
+ {/* Prerequisites Section */} +
+

Prerequisites

+

+ Shows all software checks and available hardware +

+
+ + + + + + +
+
+ + {/* Configuration Status */} +
+

+ Docker Swarm GPU Status +

+

+ Shows the configuration state that changes with the Enable + GPU +

+
+ + +
+
+
+ )} +
+
+
+
+ ); } interface StatusRowProps { - label: string; - isEnabled?: boolean; - description?: string; - value?: string | number; - showIcon?: boolean; + label: string; + isEnabled?: boolean; + description?: string; + value?: string | number; + showIcon?: boolean; } -function StatusRow({ label, isEnabled, description, value, showIcon = true }: StatusRowProps) { - return ( -
- {label} -
- {showIcon ? ( - <> - {isEnabled ? ( - - ) : ( - - )} - - {description || (isEnabled ? 'Installed' : 'Not Installed')} - - - ) : ( - {value} - )} -
-
- ); -} \ No newline at end of file +function StatusRow({ + label, + isEnabled, + description, + value, + showIcon = true, +}: StatusRowProps) { + return ( +
+ {label} +
+ {showIcon ? ( + <> + {isEnabled ? ( + + ) : ( + + )} + + {description || (isEnabled ? "Installed" : "Not Installed")} + + + ) : ( + {value} + )} +
+
+ ); +} diff --git a/apps/dokploy/server/api/routers/settings.ts b/apps/dokploy/server/api/routers/settings.ts index 46529282..608a5028 100644 --- a/apps/dokploy/server/api/routers/settings.ts +++ b/apps/dokploy/server/api/routers/settings.ts @@ -52,10 +52,7 @@ import { writeMainConfig, writeTraefikConfigInPath, } from "@dokploy/server"; -import { - checkGPUStatus, - setupGPUSupport, -} from "@dokploy/server"; +import { checkGPUStatus, setupGPUSupport } from "@dokploy/server"; import { generateOpenApiDocument } from "@dokploy/trpc-openapi"; import { TRPCError } from "@trpc/server"; import { sql } from "drizzle-orm"; diff --git a/apps/dokploy/templates/blender/index.ts b/apps/dokploy/templates/blender/index.ts index baf243e0..84e52755 100644 --- a/apps/dokploy/templates/blender/index.ts +++ b/apps/dokploy/templates/blender/index.ts @@ -19,12 +19,12 @@ export function generate(schema: Schema): Template { ]; const envs = [ - `PUID=1000`, - `PGID=1000`, - `TZ=Etc/UTC`, - `SUBFOLDER=/`, - `NVIDIA_VISIBLE_DEVICES=all`, - `NVIDIA_DRIVER_CAPABILITIES=all`, + "PUID=1000", + "PGID=1000", + "TZ=Etc/UTC", + "SUBFOLDER=/", + "NVIDIA_VISIBLE_DEVICES=all", + "NVIDIA_DRIVER_CAPABILITIES=all", ]; return { From 2e6d9c34c0bdc61d9c4d8fb759f2ea9fddcbe654 Mon Sep 17 00:00:00 2001 From: vishalkadam47 Date: Thu, 7 Nov 2024 02:52:41 +0530 Subject: [PATCH 09/58] feat: add dokploy server gpu setup --- .../servers/actions/show-dokploy-actions.tsx | 2 + .../settings/servers/gpu-support-modal.tsx | 36 +++++++++++++++ .../settings/servers/gpu-support.tsx | 26 ++++++----- apps/dokploy/server/api/routers/settings.ts | 37 +++++++-------- packages/server/src/utils/gpu-setup.ts | 45 +++++++++++++++---- 5 files changed, 104 insertions(+), 42 deletions(-) create mode 100644 apps/dokploy/components/dashboard/settings/servers/gpu-support-modal.tsx diff --git a/apps/dokploy/components/dashboard/settings/servers/actions/show-dokploy-actions.tsx b/apps/dokploy/components/dashboard/settings/servers/actions/show-dokploy-actions.tsx index 49f6772b..9b12af84 100644 --- a/apps/dokploy/components/dashboard/settings/servers/actions/show-dokploy-actions.tsx +++ b/apps/dokploy/components/dashboard/settings/servers/actions/show-dokploy-actions.tsx @@ -13,6 +13,7 @@ import { import { api } from "@/utils/api"; import { toast } from "sonner"; import { ShowModalLogs } from "../../web-server/show-modal-logs"; +import { GPUSupportModal } from "../gpu-support-modal"; export const ShowDokployActions = () => { const { mutateAsync: reloadServer, isLoading } = @@ -45,6 +46,7 @@ export const ShowDokployActions = () => { Watch logs + diff --git a/apps/dokploy/components/dashboard/settings/servers/gpu-support-modal.tsx b/apps/dokploy/components/dashboard/settings/servers/gpu-support-modal.tsx new file mode 100644 index 00000000..9cf858cd --- /dev/null +++ b/apps/dokploy/components/dashboard/settings/servers/gpu-support-modal.tsx @@ -0,0 +1,36 @@ +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogTrigger, +} from "@/components/ui/dialog"; +import { DropdownMenuItem } from "@/components/ui/dropdown-menu"; +import { useState } from "react"; +import { GPUSupport } from "./gpu-support"; + +export const GPUSupportModal = () => { + const [isOpen, setIsOpen] = useState(false); + + return ( + + + e.preventDefault()} + > + GPU Setup + + + + + + Dokploy Server GPU Setup + + + + + + + ); +}; diff --git a/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx index ae931a3a..d0c178c4 100644 --- a/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx +++ b/apps/dokploy/components/dashboard/settings/servers/gpu-support.tsx @@ -26,7 +26,7 @@ export function GPUSupport({ serverId }: GPUSupportProps) { api.settings.checkGPUStatus.useQuery( { serverId }, { - enabled: !!serverId, + enabled: serverId !== undefined, refetchInterval: 5000, }, ); @@ -38,17 +38,20 @@ export function GPUSupport({ serverId }: GPUSupportProps) { onSuccess: async () => { toast.success("GPU support enabled successfully"); setIsLoading(false); - - await Promise.all([ - utils.settings.checkGPUStatus.invalidate({ serverId }), - utils.server.invalidate(), - ]); + await utils.settings.checkGPUStatus.invalidate({ serverId }); }, onError: (error) => { if (error instanceof TRPCClientError) { const errorMessage = error.message; - if (errorMessage.includes("permission denied")) { - toast.error("Permission denied. Please ensure proper sudo access."); + if ( + errorMessage.includes( + "Permission denied. Please ensure proper sudo access.", + ) || + errorMessage.includes("sudo access required") + ) { + toast.error( + "Administrator privileges required. Please enter your password when prompted.", + ); } else if (errorMessage.includes("Failed to configure GPU")) { toast.error( "GPU configuration failed. Please check system requirements.", @@ -59,13 +62,12 @@ export function GPUSupport({ serverId }: GPUSupportProps) { } else { toast.error("Failed to enable GPU support. Please check server logs."); } - setIsLoading(false); }, }); const handleEnableGPU = async () => { - if (!serverId) { + if (serverId === undefined) { toast.error("No server selected"); return; } @@ -99,7 +101,7 @@ export function GPUSupport({ serverId }: GPUSupportProps) { >