diff --git a/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx b/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx
index 8bfcf4da..119d4d29 100644
--- a/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx
+++ b/apps/dokploy/components/dashboard/settings/servers/setup-server.tsx
@@ -32,6 +32,7 @@ import Link from "next/link";
import { useState } from "react";
import { toast } from "sonner";
import { ShowDeployment } from "../../application/deployments/show-deployment";
+import { GPUSupport } from "./gpu-support";
interface Props {
serverId: string;
@@ -89,9 +90,10 @@ export const SetupServer = ({ serverId }: Props) => {
) : (
-
+
SSH Keys
Deployments
+ GPU Setup
{
+
+
+
+
+
)}
diff --git a/apps/dokploy/server/api/routers/settings.ts b/apps/dokploy/server/api/routers/settings.ts
index e1e63579..4a000889 100644
--- a/apps/dokploy/server/api/routers/settings.ts
+++ b/apps/dokploy/server/api/routers/settings.ts
@@ -52,6 +52,10 @@ import {
writeMainConfig,
writeTraefikConfigInPath,
} from "@dokploy/server";
+import {
+ checkGPUStatus,
+ setupGPUSupport,
+} from "@dokploy/server/src/utils/gpu-setup";
import { generateOpenApiDocument } from "@dokploy/trpc-openapi";
import { TRPCError } from "@trpc/server";
import { sql } from "drizzle-orm";
@@ -650,6 +654,62 @@ export const settingsRouter = createTRPCRouter({
}
return { status: "not_cloud" };
}),
+ setupGPU: adminProcedure
+ .input(
+ z.object({
+ serverId: z.string(),
+ }),
+ )
+ .mutation(async ({ input }) => {
+ try {
+ if (IS_CLOUD) {
+ return { success: true };
+ }
+
+ if (!input.serverId) {
+ throw new TRPCError({
+ code: "BAD_REQUEST",
+ message: "Server ID is required",
+ });
+ }
+
+ await setupGPUSupport(input.serverId);
+ return { success: true };
+ } catch (error) {
+ throw new TRPCError({
+ code: "INTERNAL_SERVER_ERROR",
+ message:
+ error instanceof Error
+ ? error.message
+ : "Failed to enable GPU support",
+ cause: error,
+ });
+ }
+ }),
+ checkGPUStatus: adminProcedure
+ .input(
+ z.object({
+ serverId: z.string().optional(),
+ }),
+ )
+ .query(async ({ input }) => {
+ if (IS_CLOUD) {
+ return {
+ driverInstalled: false,
+ driverVersion: undefined,
+ gpuModel: undefined,
+ runtimeInstalled: false,
+ runtimeConfigured: false,
+ cudaSupport: undefined,
+ cudaVersion: undefined,
+ memoryInfo: undefined,
+ availableGPUs: 0,
+ swarmEnabled: false,
+ gpuResources: 0,
+ };
+ }
+ return await checkGPUStatus(input.serverId);
+ }),
});
// {
// "Parallelism": 1,
diff --git a/apps/dokploy/server/api/trpc.ts b/apps/dokploy/server/api/trpc.ts
index 8aec99ec..db4f7adf 100644
--- a/apps/dokploy/server/api/trpc.ts
+++ b/apps/dokploy/server/api/trpc.ts
@@ -21,8 +21,6 @@ import {
import type { Session, User } from "lucia";
import superjson from "superjson";
import { ZodError } from "zod";
-import { setupGPUSupport } from '@dokploy/server/src/utils/gpu-setup';
-
/**
* 1. CONTEXT
*
@@ -209,10 +207,3 @@ export const adminProcedure = t.procedure.use(({ ctx, next }) => {
},
});
});
-
-const appRouter = t.router({
- setupGPU: t.procedure.mutation(async () => {
- await setupGPUSupport();
- return { success: true };
- }),
- });
\ No newline at end of file
diff --git a/apps/dokploy/templates/blender/index.ts b/apps/dokploy/templates/blender/index.ts
index 088e6fcc..baf243e0 100644
--- a/apps/dokploy/templates/blender/index.ts
+++ b/apps/dokploy/templates/blender/index.ts
@@ -1,34 +1,34 @@
import {
- generateHash,
- generateRandomDomain,
- type Template,
- type Schema,
- type DomainSchema,
+ type DomainSchema,
+ type Schema,
+ type Template,
+ generateHash,
+ generateRandomDomain,
} from "../utils";
export function generate(schema: Schema): Template {
- const mainServiceHash = generateHash(schema.projectName);
- const mainDomain = generateRandomDomain(schema);
+ const mainServiceHash = generateHash(schema.projectName);
+ const mainDomain = generateRandomDomain(schema);
- const domains: DomainSchema[] = [
- {
- host: mainDomain,
- port: 3000,
- serviceName: "blender",
- },
- ];
+ const domains: DomainSchema[] = [
+ {
+ host: mainDomain,
+ port: 3000,
+ serviceName: "blender",
+ },
+ ];
- const envs = [
- `PUID=1000`,
- `PGID=1000`,
- `TZ=Etc/UTC`,
- `SUBFOLDER=/`,
- `NVIDIA_VISIBLE_DEVICES=all`,
- `NVIDIA_DRIVER_CAPABILITIES=all`,
- ];
+ const envs = [
+ `PUID=1000`,
+ `PGID=1000`,
+ `TZ=Etc/UTC`,
+ `SUBFOLDER=/`,
+ `NVIDIA_VISIBLE_DEVICES=all`,
+ `NVIDIA_DRIVER_CAPABILITIES=all`,
+ ];
- return {
- envs,
- domains,
- };
+ return {
+ envs,
+ domains,
+ };
}
diff --git a/apps/dokploy/templates/templates.ts b/apps/dokploy/templates/templates.ts
index 40d493e5..115a1ecf 100644
--- a/apps/dokploy/templates/templates.ts
+++ b/apps/dokploy/templates/templates.ts
@@ -516,7 +516,8 @@ export const templates: TemplateData[] = [
id: "blender",
name: "Blender",
version: "latest",
- description: "Blender is a free and open-source 3D creation suite. It supports the entire 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, video editing and 2D animation pipeline.",
+ description:
+ "Blender is a free and open-source 3D creation suite. It supports the entire 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, video editing and 2D animation pipeline.",
logo: "blender.svg",
links: {
github: "https://github.com/linuxserver/docker-blender",
diff --git a/packages/server/src/constants/index.ts b/packages/server/src/constants/index.ts
index be2a72de..f2f1a4d8 100644
--- a/packages/server/src/constants/index.ts
+++ b/packages/server/src/constants/index.ts
@@ -36,4 +36,4 @@ export const paths = (isServer = false) => {
MONITORING_PATH: `${BASE_PATH}/monitoring`,
REGISTRY_PATH: `${BASE_PATH}/registry`,
};
-};
\ No newline at end of file
+};
diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts
index 06f2bc87..90daec2d 100644
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@@ -118,3 +118,4 @@ export * from "./monitoring/utilts";
export * from "./db/validations/domain";
export * from "./db/validations/index";
+export * from "./utils/gpu-setup";
diff --git a/packages/server/src/utils/gpu-setup.ts b/packages/server/src/utils/gpu-setup.ts
index 459c3395..71f3bf0f 100644
--- a/packages/server/src/utils/gpu-setup.ts
+++ b/packages/server/src/utils/gpu-setup.ts
@@ -1,9 +1,261 @@
-import { docker } from '../constants';
+import { docker } from "../constants";
+import { execAsync } from "../utils/process/execAsync";
+import { execAsyncRemote } from "../utils/process/execAsync";
+import { getRemoteDocker } from "./servers/remote-docker";
-export async function setupGPUSupport() {
- await docker.swarmUpdate({
- TaskDefaults: {
- GenericResources: [{ DiscreteResourceSpec: { Kind: 'gpu', Value: 1 } }]
- }
- });
-}
\ No newline at end of file
+interface GPUInfo {
+ driverInstalled: boolean;
+ driverVersion?: string;
+ gpuModel?: string;
+ runtimeInstalled: boolean;
+ runtimeConfigured: boolean;
+ cudaSupport: boolean;
+ cudaVersion?: string;
+ memoryInfo?: string;
+ availableGPUs: number;
+ swarmEnabled: boolean;
+ gpuResources: number;
+}
+
+interface DiscreteResourceSpec {
+ Kind: string;
+ Value: number;
+}
+
+interface NamedGenericResource {
+ NamedResourceSpec?: { Kind: string; Value: string };
+ DiscreteResourceSpec?: DiscreteResourceSpec;
+}
+
+export async function checkGPUStatus(serverId?: string): Promise {
+ try {
+ // Check NVIDIA Driver
+ let driverInstalled = false;
+ let driverVersion: string | undefined;
+ let availableGPUs = 0;
+
+ try {
+ const driverCommand =
+ "nvidia-smi --query-gpu=driver_version --format=csv,noheader";
+ const { stdout: nvidiaSmi } = serverId
+ ? await execAsyncRemote(serverId, driverCommand)
+ : await execAsync(driverCommand);
+
+ driverVersion = nvidiaSmi.trim();
+ if (driverVersion) {
+ driverInstalled = true;
+ const countCommand =
+ "nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l";
+ const { stdout: gpuCount } = serverId
+ ? await execAsyncRemote(serverId, countCommand)
+ : await execAsync(countCommand);
+
+ availableGPUs = Number.parseInt(gpuCount.trim(), 10);
+ }
+ } catch (error) {
+ console.debug("GPU driver check:", error);
+ }
+
+ // Check Runtime Configuration
+ let runtimeInstalled = false;
+ let runtimeConfigured = false;
+ try {
+ const runtimeCommand = 'docker info --format "{{json .Runtimes}}"';
+ const { stdout: runtimeInfo } = serverId
+ ? await execAsyncRemote(serverId, runtimeCommand)
+ : await execAsync(runtimeCommand);
+
+ const runtimes = JSON.parse(runtimeInfo);
+ runtimeInstalled = "nvidia" in runtimes;
+
+ // Check if it's the default runtime
+ const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"';
+ const { stdout: defaultRuntime } = serverId
+ ? await execAsyncRemote(serverId, defaultCommand)
+ : await execAsync(defaultCommand);
+
+ runtimeConfigured = defaultRuntime.trim() === "nvidia";
+ } catch (error) {
+ console.debug("Runtime check:", error);
+ }
+
+ // Check Swarm GPU Resources
+ let swarmEnabled = false;
+ let gpuResources = 0;
+
+ try {
+ // Check node resources directly from inspect
+ const nodeCommand =
+ "docker node inspect self --format '{{json .Description.Resources.GenericResources}}'";
+ const { stdout: resources } = serverId
+ ? await execAsyncRemote(serverId, nodeCommand)
+ : await execAsync(nodeCommand);
+
+ if (resources && resources !== "null") {
+ const genericResources = JSON.parse(resources);
+ for (const resource of genericResources) {
+ if (
+ resource.DiscreteResourceSpec &&
+ (resource.DiscreteResourceSpec.Kind === "GPU" ||
+ resource.DiscreteResourceSpec.Kind === "gpu")
+ ) {
+ gpuResources = resource.DiscreteResourceSpec.Value;
+ swarmEnabled = true;
+ break;
+ }
+ }
+ }
+ } catch (error) {
+ console.debug("Swarm resource check:", error);
+ }
+
+ // Get GPU Model and Memory Info
+ const gpuInfoCommand =
+ "nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader";
+ const { stdout: gpuInfo } = serverId
+ ? await execAsyncRemote(serverId, gpuInfoCommand)
+ : await execAsync(gpuInfoCommand);
+
+ const [gpuModel, memoryTotal] = gpuInfo.split(",").map((s) => s.trim());
+
+ // Check CUDA Support
+ const cudaCommand = 'nvidia-smi -q | grep "CUDA Version"';
+ const { stdout: cudaInfo } = serverId
+ ? await execAsyncRemote(serverId, cudaCommand)
+ : await execAsync(cudaCommand);
+
+ const cudaMatch = cudaInfo.match(/CUDA Version\s*:\s*([\d\.]+)/);
+ const cudaVersion = cudaMatch ? cudaMatch[1] : undefined;
+ const cudaSupport = !!cudaVersion;
+
+ return {
+ driverInstalled,
+ driverVersion,
+ runtimeInstalled,
+ runtimeConfigured,
+ availableGPUs,
+ swarmEnabled,
+ gpuResources,
+ gpuModel,
+ memoryInfo: memoryTotal,
+ cudaSupport,
+ cudaVersion,
+ };
+ } catch (error) {
+ console.error("Error in checkGPUStatus:", error);
+ return {
+ driverInstalled: false,
+ driverVersion: undefined,
+ runtimeInstalled: false,
+ runtimeConfigured: false,
+ cudaSupport: false,
+ cudaVersion: undefined,
+ gpuModel: undefined,
+ memoryInfo: undefined,
+ availableGPUs: 0,
+ swarmEnabled: false,
+ gpuResources: 0,
+ };
+ }
+}
+
+export async function setupGPUSupport(serverId?: string): Promise {
+ try {
+ // 1. Check current GPU status first
+ const initialStatus = await checkGPUStatus(serverId);
+
+ // If GPU is already configured, just verify and return quickly
+ if (
+ initialStatus.swarmEnabled &&
+ initialStatus.runtimeConfigured &&
+ initialStatus.driverInstalled
+ ) {
+ console.log("GPU already configured, skipping setup");
+ return;
+ }
+
+ // 2. Verify GPU prerequisites
+ if (!initialStatus.driverInstalled || !initialStatus.runtimeInstalled) {
+ throw new Error(
+ "NVIDIA drivers or runtime not installed. Please install them first.",
+ );
+ }
+
+ // Get the node ID
+ const nodeIdCommand = 'docker info --format "{{.Swarm.NodeID}}"';
+ const { stdout: nodeId } = serverId
+ ? await execAsyncRemote(serverId, nodeIdCommand)
+ : await execAsync(nodeIdCommand);
+
+ if (!nodeId.trim()) {
+ throw new Error("Setup Server before enabling GPU support");
+ }
+
+ // 3. Configure NVIDIA runtime in daemon.json
+ const daemonConfig = {
+ runtimes: {
+ nvidia: {
+ path: "nvidia-container-runtime",
+ runtimeArgs: [],
+ },
+ },
+ "default-runtime": "nvidia",
+ "node-generic-resources": [`GPU=${initialStatus.availableGPUs}`],
+ };
+
+ const setupCommands = [
+ "sudo -n true",
+ `echo '${JSON.stringify(daemonConfig, null, 2)}' | sudo tee /etc/docker/daemon.json`,
+ "sudo mkdir -p /etc/nvidia-container-runtime",
+ 'echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" | sudo tee -a /etc/nvidia-container-runtime/config.toml',
+ "sudo systemctl daemon-reload",
+ "sudo systemctl restart docker",
+ ].join(" && ");
+
+ if (serverId) {
+ await execAsyncRemote(serverId, setupCommands);
+ } else {
+ await execAsync(setupCommands);
+ }
+
+ // 4. Reduced wait time for Docker restart
+ await new Promise((resolve) => setTimeout(resolve, 10000));
+
+ // 5. Add GPU label to the node
+ const labelCommand = `docker node update --label-add gpu=true ${nodeId.trim()}`;
+ if (serverId) {
+ await execAsyncRemote(serverId, labelCommand);
+ } else {
+ await execAsync(labelCommand);
+ }
+
+ // 6. Quick final verification
+ await new Promise((resolve) => setTimeout(resolve, 5000));
+ const finalStatus = await checkGPUStatus(serverId);
+
+ if (!finalStatus.swarmEnabled) {
+ const diagnosticCommands = [
+ `docker node inspect ${nodeId.trim()}`,
+ 'nvidia-smi -a | grep "GPU UUID"',
+ "cat /etc/docker/daemon.json",
+ "cat /etc/nvidia-container-runtime/config.toml",
+ ].join(" && ");
+
+ const { stdout: diagnostics } = serverId
+ ? await execAsyncRemote(serverId, diagnosticCommands)
+ : await execAsync(diagnosticCommands);
+
+ console.error("Diagnostic Information:", diagnostics);
+ throw new Error("GPU support not detected in swarm after setup");
+ }
+
+ console.log("GPU setup completed successfully:", {
+ availableGPUs: initialStatus.availableGPUs,
+ driverVersion: initialStatus.driverVersion,
+ nodeId: nodeId.trim(),
+ });
+ } catch (error) {
+ console.error("GPU Setup Error:", error);
+ throw error;
+ }
+}