mirror of
https://github.com/Dokploy/dokploy
synced 2025-06-26 18:27:59 +00:00
Merge pull request #590 from wish-oss/feature/gpu-support-blender-template
feat: Implement Remote server and Dokploy Server - GPU Support for Docker Swarm
This commit is contained in:
@@ -13,6 +13,7 @@ import {
|
||||
import { api } from "@/utils/api";
|
||||
import { toast } from "sonner";
|
||||
import { ShowModalLogs } from "../../web-server/show-modal-logs";
|
||||
import { GPUSupportModal } from "../gpu-support-modal";
|
||||
|
||||
export const ShowDokployActions = () => {
|
||||
const { mutateAsync: reloadServer, isLoading } =
|
||||
@@ -45,6 +46,7 @@ export const ShowDokployActions = () => {
|
||||
<ShowModalLogs appName="dokploy">
|
||||
<span>Watch logs</span>
|
||||
</ShowModalLogs>
|
||||
<GPUSupportModal />
|
||||
</DropdownMenuGroup>
|
||||
</DropdownMenuContent>
|
||||
</DropdownMenu>
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
DialogTrigger,
|
||||
} from "@/components/ui/dialog";
|
||||
import { DropdownMenuItem } from "@/components/ui/dropdown-menu";
|
||||
import { useState } from "react";
|
||||
import { GPUSupport } from "./gpu-support";
|
||||
|
||||
export const GPUSupportModal = () => {
|
||||
const [isOpen, setIsOpen] = useState(false);
|
||||
|
||||
return (
|
||||
<Dialog open={isOpen} onOpenChange={setIsOpen}>
|
||||
<DialogTrigger asChild>
|
||||
<DropdownMenuItem
|
||||
className="w-full cursor-pointer"
|
||||
onSelect={(e) => e.preventDefault()}
|
||||
>
|
||||
<span>GPU Setup</span>
|
||||
</DropdownMenuItem>
|
||||
</DialogTrigger>
|
||||
<DialogContent className="sm:max-w-4xl overflow-y-auto max-h-screen">
|
||||
<DialogHeader>
|
||||
<DialogTitle className="flex items-center gap-2">
|
||||
Dokploy Server GPU Setup
|
||||
</DialogTitle>
|
||||
</DialogHeader>
|
||||
|
||||
<GPUSupport serverId="" />
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
};
|
||||
@@ -0,0 +1,282 @@
|
||||
import { AlertBlock } from "@/components/shared/alert-block";
|
||||
import { DialogAction } from "@/components/shared/dialog-action";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import {
|
||||
Card,
|
||||
CardContent,
|
||||
CardDescription,
|
||||
CardHeader,
|
||||
CardTitle,
|
||||
} from "@/components/ui/card";
|
||||
import { api } from "@/utils/api";
|
||||
import { TRPCClientError } from "@trpc/client";
|
||||
import { CheckCircle2, Cpu, Loader2, RefreshCw, XCircle } from "lucide-react";
|
||||
import { useEffect, useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
|
||||
interface GPUSupportProps {
|
||||
serverId?: string;
|
||||
}
|
||||
|
||||
export function GPUSupport({ serverId }: GPUSupportProps) {
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||
const utils = api.useContext();
|
||||
|
||||
const {
|
||||
data: gpuStatus,
|
||||
isLoading: isChecking,
|
||||
refetch,
|
||||
} = api.settings.checkGPUStatus.useQuery(
|
||||
{ serverId },
|
||||
{
|
||||
enabled: serverId !== undefined,
|
||||
},
|
||||
);
|
||||
|
||||
const setupGPU = api.settings.setupGPU.useMutation({
|
||||
onMutate: () => {
|
||||
setIsLoading(true);
|
||||
},
|
||||
onSuccess: async () => {
|
||||
toast.success("GPU support enabled successfully");
|
||||
setIsLoading(false);
|
||||
await utils.settings.checkGPUStatus.invalidate({ serverId });
|
||||
},
|
||||
onError: (error) => {
|
||||
toast.error(
|
||||
error.message ||
|
||||
"Failed to enable GPU support. Please check server logs.",
|
||||
);
|
||||
setIsLoading(false);
|
||||
},
|
||||
});
|
||||
|
||||
const handleRefresh = async () => {
|
||||
setIsRefreshing(true);
|
||||
try {
|
||||
await utils.settings.checkGPUStatus.invalidate({ serverId });
|
||||
await refetch();
|
||||
} catch (error) {
|
||||
toast.error("Failed to refresh GPU status");
|
||||
} finally {
|
||||
setIsRefreshing(false);
|
||||
}
|
||||
};
|
||||
useEffect(() => {
|
||||
handleRefresh();
|
||||
}, []);
|
||||
|
||||
const handleEnableGPU = async () => {
|
||||
if (serverId === undefined) {
|
||||
toast.error("No server selected");
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await setupGPU.mutateAsync({ serverId });
|
||||
} catch (error) {
|
||||
// Error handling is done in mutation's onError
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<CardContent className="p-0">
|
||||
<div className="flex flex-col gap-4">
|
||||
<Card className="bg-background">
|
||||
<CardHeader className="flex flex-row items-center justify-between flex-wrap gap-2">
|
||||
<div className="flex flex-row gap-2 justify-between w-full items-end max-sm:flex-col">
|
||||
<div className="flex flex-col gap-1">
|
||||
<div className="flex items-center gap-2">
|
||||
<Cpu className="size-5" />
|
||||
<CardTitle className="text-xl">GPU Configuration</CardTitle>
|
||||
</div>
|
||||
<CardDescription>
|
||||
Configure and monitor GPU support
|
||||
</CardDescription>
|
||||
</div>
|
||||
<div className="flex items-center gap-2">
|
||||
<DialogAction
|
||||
title="Enable GPU Support?"
|
||||
description="This will enable GPU support for Docker Swarm on this server. Make sure you have the required hardware and drivers installed."
|
||||
onClick={handleEnableGPU}
|
||||
>
|
||||
<Button
|
||||
isLoading={isLoading}
|
||||
disabled={isLoading || serverId === undefined || isChecking}
|
||||
>
|
||||
{isLoading
|
||||
? "Enabling GPU..."
|
||||
: gpuStatus?.swarmEnabled
|
||||
? "Reconfigure GPU"
|
||||
: "Enable GPU"}
|
||||
</Button>
|
||||
</DialogAction>
|
||||
<Button
|
||||
size="icon"
|
||||
onClick={handleRefresh}
|
||||
disabled={isChecking || isRefreshing}
|
||||
>
|
||||
<RefreshCw
|
||||
className={`h-5 w-5 ${isChecking || isRefreshing ? "animate-spin" : ""}`}
|
||||
/>
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
</CardHeader>
|
||||
|
||||
<CardContent className="flex flex-col gap-4">
|
||||
<AlertBlock type="info">
|
||||
<div className="font-medium mb-2">System Requirements:</div>
|
||||
<ul className="list-disc list-inside text-sm space-y-1">
|
||||
<li>NVIDIA GPU hardware must be physically installed</li>
|
||||
<li>
|
||||
NVIDIA drivers must be installed and running (check with
|
||||
nvidia-smi)
|
||||
</li>
|
||||
<li>
|
||||
NVIDIA Container Runtime must be installed
|
||||
(nvidia-container-runtime)
|
||||
</li>
|
||||
<li>User must have sudo/administrative privileges</li>
|
||||
<li>System must support CUDA for GPU acceleration</li>
|
||||
</ul>
|
||||
</AlertBlock>
|
||||
|
||||
{isChecking ? (
|
||||
<div className="flex items-center justify-center text-muted-foreground py-4">
|
||||
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
|
||||
<span>Checking GPU status...</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="grid gap-4">
|
||||
{/* Prerequisites Section */}
|
||||
<div className="border rounded-lg p-4">
|
||||
<h3 className="text-lg font-semibold mb-1">Prerequisites</h3>
|
||||
<p className="text-sm text-muted-foreground mb-4">
|
||||
Shows all software checks and available hardware
|
||||
</p>
|
||||
<div className="grid gap-2.5">
|
||||
<StatusRow
|
||||
label="NVIDIA Driver"
|
||||
isEnabled={gpuStatus?.driverInstalled}
|
||||
description={
|
||||
gpuStatus?.driverVersion
|
||||
? `Installed (v${gpuStatus.driverVersion})`
|
||||
: "Not Installed"
|
||||
}
|
||||
/>
|
||||
<StatusRow
|
||||
label="GPU Model"
|
||||
value={gpuStatus?.gpuModel || "Not Detected"}
|
||||
showIcon={false}
|
||||
/>
|
||||
<StatusRow
|
||||
label="GPU Memory"
|
||||
value={gpuStatus?.memoryInfo || "Not Available"}
|
||||
showIcon={false}
|
||||
/>
|
||||
<StatusRow
|
||||
label="Available GPUs"
|
||||
value={gpuStatus?.availableGPUs || 0}
|
||||
showIcon={false}
|
||||
/>
|
||||
<StatusRow
|
||||
label="CUDA Support"
|
||||
isEnabled={gpuStatus?.cudaSupport}
|
||||
description={
|
||||
gpuStatus?.cudaVersion
|
||||
? `Available (v${gpuStatus.cudaVersion})`
|
||||
: "Not Available"
|
||||
}
|
||||
/>
|
||||
<StatusRow
|
||||
label="NVIDIA Container Runtime"
|
||||
isEnabled={gpuStatus?.runtimeInstalled}
|
||||
description={
|
||||
gpuStatus?.runtimeInstalled
|
||||
? "Installed"
|
||||
: "Not Installed"
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Configuration Status */}
|
||||
<div className="border rounded-lg p-4">
|
||||
<h3 className="text-lg font-semibold mb-1">
|
||||
Docker Swarm GPU Status
|
||||
</h3>
|
||||
<p className="text-sm text-muted-foreground mb-4">
|
||||
Shows the configuration state that changes with the Enable
|
||||
GPU
|
||||
</p>
|
||||
<div className="grid gap-2.5">
|
||||
<StatusRow
|
||||
label="Runtime Configuration"
|
||||
isEnabled={gpuStatus?.runtimeConfigured}
|
||||
description={
|
||||
gpuStatus?.runtimeConfigured
|
||||
? "Default Runtime"
|
||||
: "Not Default Runtime"
|
||||
}
|
||||
/>
|
||||
<StatusRow
|
||||
label="Swarm GPU Support"
|
||||
isEnabled={gpuStatus?.swarmEnabled}
|
||||
description={
|
||||
gpuStatus?.swarmEnabled
|
||||
? `Enabled (${gpuStatus.gpuResources} GPU${gpuStatus.gpuResources !== 1 ? "s" : ""})`
|
||||
: "Not Enabled"
|
||||
}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</CardContent>
|
||||
</Card>
|
||||
</div>
|
||||
</CardContent>
|
||||
);
|
||||
}
|
||||
|
||||
interface StatusRowProps {
|
||||
label: string;
|
||||
isEnabled?: boolean;
|
||||
description?: string;
|
||||
value?: string | number;
|
||||
showIcon?: boolean;
|
||||
}
|
||||
|
||||
export function StatusRow({
|
||||
label,
|
||||
isEnabled,
|
||||
description,
|
||||
value,
|
||||
showIcon = true,
|
||||
}: StatusRowProps) {
|
||||
return (
|
||||
<div className="flex items-center justify-between">
|
||||
<span className="text-sm">{label}</span>
|
||||
<div className="flex items-center gap-2">
|
||||
{showIcon ? (
|
||||
<>
|
||||
{isEnabled ? (
|
||||
<CheckCircle2 className="size-4 text-green-500" />
|
||||
) : (
|
||||
<XCircle className="size-4 text-red-500" />
|
||||
)}
|
||||
<span
|
||||
className={`text-sm ${isEnabled ? "text-green-500" : "text-red-500"}`}
|
||||
>
|
||||
{description || (isEnabled ? "Installed" : "Not Installed")}
|
||||
</span>
|
||||
</>
|
||||
) : (
|
||||
<span className="text-sm text-muted-foreground">{value}</span>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -32,6 +32,7 @@ import Link from "next/link";
|
||||
import { useState } from "react";
|
||||
import { toast } from "sonner";
|
||||
import { ShowDeployment } from "../../application/deployments/show-deployment";
|
||||
import { GPUSupport } from "./gpu-support";
|
||||
|
||||
interface Props {
|
||||
serverId: string;
|
||||
@@ -89,9 +90,10 @@ export const SetupServer = ({ serverId }: Props) => {
|
||||
) : (
|
||||
<div id="hook-form-add-gitlab" className="grid w-full gap-1">
|
||||
<Tabs defaultValue="ssh-keys">
|
||||
<TabsList className="grid grid-cols-2 w-[400px]">
|
||||
<TabsList className="grid grid-cols-3 w-[400px]">
|
||||
<TabsTrigger value="ssh-keys">SSH Keys</TabsTrigger>
|
||||
<TabsTrigger value="deployments">Deployments</TabsTrigger>
|
||||
<TabsTrigger value="gpu-setup">GPU Setup</TabsTrigger>
|
||||
</TabsList>
|
||||
<TabsContent
|
||||
value="ssh-keys"
|
||||
@@ -291,6 +293,14 @@ export const SetupServer = ({ serverId }: Props) => {
|
||||
</div>
|
||||
</CardContent>
|
||||
</TabsContent>
|
||||
<TabsContent
|
||||
value="gpu-setup"
|
||||
className="outline-none ring-0 focus-visible:ring-0 focus-visible:ring-offset-0"
|
||||
>
|
||||
<div className="flex flex-col gap-2 text-sm text-muted-foreground pt-3">
|
||||
<GPUSupport serverId={serverId} />
|
||||
</div>
|
||||
</TabsContent>
|
||||
</Tabs>
|
||||
</div>
|
||||
)}
|
||||
|
||||
153
apps/dokploy/public/templates/blender.svg
Normal file
153
apps/dokploy/public/templates/blender.svg
Normal file
@@ -0,0 +1,153 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
width="11.567343mm"
|
||||
height="15.032981mm"
|
||||
viewBox="0 0 11.567343 15.03298"
|
||||
version="1.1"
|
||||
id="svg8"
|
||||
sodipodi:docname="community_logo_black.svg">
|
||||
<defs
|
||||
id="defs2" />
|
||||
<sodipodi:namedview
|
||||
id="base"
|
||||
pagecolor="#c8c8c8"
|
||||
bordercolor="#666666"
|
||||
borderopacity="1.0"
|
||||
showgrid="false"
|
||||
showguides="true"
|
||||
borderlayer="true"
|
||||
fit-margin-top="1"
|
||||
fit-margin-left="1"
|
||||
fit-margin-right="1"
|
||||
fit-margin-bottom="1"/>
|
||||
<metadata
|
||||
id="metadata5">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:title></dc:title>
|
||||
</cc:Work>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
id="layer1"
|
||||
transform="translate(-115.93625,-150.07138)">
|
||||
<g
|
||||
transform="translate(-3.8788837,214.53487)"
|
||||
id="g1369">
|
||||
<path
|
||||
style="opacity:1;fill:#000000;fill-opacity:0.07058824;stroke:none;stroke-width:0.31555739;stroke-miterlimit:1.41420996;stroke-dasharray:none;stroke-opacity:1;paint-order:markers stroke fill"
|
||||
d="m 121.59341,-62.933898 c -0.43151,0 -0.77882,0.347312 -0.77882,0.778817 v 7.918777 c 0,0.04214 0.004,0.08316 0.0106,0.12345 7.5e-4,0.0053 10e-4,0.01041 0.002,0.01567 0.001,0.0073 0.002,0.01466 0.004,0.02186 0.10284,0.693169 0.73757,1.119278 2.19888,2.190555 2.64127,1.936306 2.45943,1.935512 5.11716,0.02186 1.68877,-1.215962 2.28048,-1.590346 2.23197,-2.501308 v -7.790874 c 0,-0.431505 -0.34751,-0.778817 -0.77902,-0.778817 z"
|
||||
id="path1373"/>
|
||||
<path
|
||||
id="path1323"
|
||||
d="m 121.59341,-63.463065 c -0.43151,0 -0.77882,0.347312 -0.77882,0.778817 v 7.918777 c 0,0.04214 0.004,0.08316 0.0106,0.12345 7.5e-4,0.0053 10e-4,0.01041 0.002,0.01567 0.001,0.0073 0.002,0.01466 0.004,0.02186 0.10284,0.693169 0.73757,1.119278 2.19888,2.190555 2.64127,1.936306 2.45943,1.935512 5.11716,0.02186 1.68877,-1.215962 2.28048,-1.590346 2.23197,-2.501308 v -7.790874 c 0,-0.431505 -0.34751,-0.778817 -0.77902,-0.778817 z"
|
||||
style="opacity:1;fill:#363636;fill-opacity:1;stroke:none;stroke-width:0.31555739;stroke-miterlimit:1.41420996;stroke-dasharray:none;stroke-opacity:1;paint-order:markers stroke fill" />
|
||||
<g
|
||||
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996"
|
||||
id="g1353"
|
||||
transform="matrix(0.02054188,0,0,0.02054188,97.15326,-61.563495)">
|
||||
<g
|
||||
id="g1327"
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||
<path
|
||||
id="path1325"
|
||||
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||
d="m 364.467,-333.746 c 0.171,-1.908 1.646,-3.118 3.899,-3.118 2.256,0 3.73,1.21 3.901,3.118 z m 7.569,4.711 c -0.577,1.414 -1.937,2.251 -3.784,2.251 -2.313,0 -3.87,-1.444 -3.933,-3.725 h 13.297 c 0,-0.237 0,-0.435 0,-0.671 0,-5.714 -3.354,-8.925 -9.364,-8.925 -5.836,0 -9.365,3.241 -9.365,8.324 0,5.114 3.584,8.35 9.365,8.35 3.469,0 6.159,-1.189 7.817,-3.279 z"/>
|
||||
</g>
|
||||
<g
|
||||
id="g1331"
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||
<path
|
||||
id="path1329"
|
||||
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||
d="m 305.468,-333.737 c 0.176,-1.908 1.651,-3.118 3.906,-3.118 2.252,0 3.726,1.21 3.899,3.118 z m 7.574,4.711 c -0.578,1.418 -1.937,2.255 -3.788,2.255 -2.309,0 -3.87,-1.448 -3.931,-3.73 h 13.294 c 0,-0.234 0,-0.431 0,-0.667 0,-5.717 -3.353,-8.929 -9.363,-8.929 -5.839,0 -9.361,3.242 -9.361,8.325 0,5.114 3.582,8.35 9.361,8.35 3.468,0 6.16,-1.185 7.821,-3.278 z"/>
|
||||
</g>
|
||||
<g
|
||||
id="g1335"
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||
<rect
|
||||
id="rect1333"
|
||||
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||
height="19.617001"
|
||||
width="4.7950001"
|
||||
y="-343.56"
|
||||
x="293.90701" />
|
||||
</g>
|
||||
<g
|
||||
id="g1339"
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||
<path
|
||||
id="path1337"
|
||||
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||
d="m 319.81,-338.348 h 4.822 v 1.168 c 1.707,-1.822 3.757,-2.743 6.069,-2.743 2.663,0 4.679,0.921 5.72,2.489 0.869,1.295 0.926,2.858 0.926,4.912 v 8.579 h -4.829 v -7.538 c 0,-3.128 -0.629,-4.572 -3.375,-4.572 -2.775,0 -4.511,1.653 -4.511,4.428 v 7.682 h -4.822 z"/>
|
||||
</g>
|
||||
<g
|
||||
id="g1343"
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||
<path
|
||||
id="path1341"
|
||||
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||
d="m 352.876,-331.538 c 0,2.685 -1.794,4.446 -4.57,4.446 -2.778,0 -4.572,-1.701 -4.572,-4.415 0,-2.754 1.77,-4.454 4.572,-4.454 2.776,0 4.57,1.73 4.57,4.423 z m 0,-6.157 c -1.219,-1.307 -2.983,-2.024 -5.435,-2.024 -5.29,0 -8.902,3.262 -8.902,8.151 0,4.793 3.587,8.146 8.815,8.146 2.397,0 4.157,-0.606 5.522,-1.965 v 1.444 h 4.825 v -20.861 l -4.825,1.244 z"/>
|
||||
</g>
|
||||
<g
|
||||
id="g1347"
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||
<path
|
||||
id="path1345"
|
||||
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||
d="m 282.947,-335.961 c 2.804,0 4.567,1.7 4.567,4.454 0,2.714 -1.791,4.415 -4.567,4.415 -2.774,0 -4.566,-1.761 -4.566,-4.446 0,-2.693 1.792,-4.423 4.566,-4.423 z m -4.566,-7.599 -4.827,-1.244 v 20.861 h 4.827 v -1.444 c 1.358,1.359 3.121,1.965 5.52,1.965 5.231,0 8.813,-3.353 8.813,-8.146 0,-4.889 -3.613,-8.151 -8.9,-8.151 -2.457,0 -4.22,0.717 -5.433,2.024 z"/>
|
||||
</g>
|
||||
<g
|
||||
id="g1351"
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||
<path
|
||||
id="path1349"
|
||||
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||
d="m 378.806,-323.943 v -14.405 h 4.825 v 0.89 c 1.445,-1.74 2.974,-2.606 4.713,-2.606 0.345,0 0.779,0.056 1.356,0.113 v 4.107 c -0.465,-0.061 -0.983,-0.061 -1.533,-0.061 -2.805,0 -4.536,1.85 -4.536,4.996 v 6.966 z"/>
|
||||
</g>
|
||||
</g>
|
||||
<g
|
||||
transform="matrix(0.04039667,0,0,0.04039667,81.604348,-55.892386)"
|
||||
style="clip-rule:evenodd;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996"
|
||||
id="g1367">
|
||||
<g
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
id="g1361"
|
||||
style="fill:#ffffff;fill-opacity:1">
|
||||
<path
|
||||
d="m 243.13,-333.715 c 0.106,-1.891 1.032,-3.557 2.429,-4.738 1.37,-1.16 3.214,-1.869 5.226,-1.869 2.01,0 3.854,0.709 5.225,1.869 1.396,1.181 2.322,2.847 2.429,4.736 0.106,1.943 -0.675,3.748 -2.045,5.086 -1.397,1.361 -3.384,2.215 -5.609,2.215 -2.225,0 -4.216,-0.854 -5.612,-2.215 -1.371,-1.338 -2.15,-3.143 -2.043,-5.084 z"
|
||||
style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero"
|
||||
id="path1359" />
|
||||
</g>
|
||||
<g
|
||||
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||
id="g1365"
|
||||
style="fill:#ffffff;fill-opacity:1">
|
||||
<path
|
||||
d="m 230.94,-329.894 c 0.013,0.74 0.249,2.178 0.603,3.301 0.744,2.377 2.006,4.576 3.762,6.514 1.802,1.992 4.021,3.592 6.584,4.728 2.694,1.193 5.613,1.801 8.645,1.796 3.027,-0.004 5.946,-0.624 8.64,-1.826 2.563,-1.147 4.78,-2.754 6.579,-4.747 1.755,-1.946 3.015,-4.149 3.761,-6.526 0.375,-1.201 0.612,-2.42 0.707,-3.643 0.093,-1.205 0.054,-2.412 -0.117,-3.618 -0.334,-2.35 -1.147,-4.555 -2.399,-6.565 -1.145,-1.847 -2.621,-3.464 -4.376,-4.825 l 0.004,-0.003 -17.711,-13.599 c -0.016,-0.012 -0.029,-0.025 -0.046,-0.036 -1.162,-0.892 -3.116,-0.889 -4.394,0.005 -1.292,0.904 -1.44,2.399 -0.29,3.342 l -0.005,0.005 7.387,6.007 -22.515,0.024 c -0.011,0 -0.022,0 -0.03,0 -1.861,0.002 -3.65,1.223 -4.004,2.766 -0.364,1.572 0.9,2.876 2.835,2.883 l -0.003,0.007 11.412,-0.022 -20.364,15.631 c -0.026,0.019 -0.054,0.039 -0.078,0.058 -1.921,1.471 -2.542,3.917 -1.332,5.465 1.228,1.574 3.839,1.577 5.78,0.009 l 11.114,-9.096 c 0,0 -0.162,1.228 -0.149,1.965 z m 28.559,4.112 c -2.29,2.333 -5.496,3.656 -8.965,3.663 -3.474,0.006 -6.68,-1.305 -8.97,-3.634 -1.119,-1.135 -1.941,-2.441 -2.448,-3.832 -0.497,-1.367 -0.69,-2.818 -0.562,-4.282 0.121,-1.431 0.547,-2.796 1.227,-4.031 0.668,-1.214 1.588,-2.311 2.724,-3.239 2.226,-1.814 5.06,-2.796 8.024,-2.8 2.967,-0.004 5.799,0.969 8.027,2.777 1.134,0.924 2.053,2.017 2.721,3.229 0.683,1.234 1.106,2.594 1.232,4.029 0.126,1.462 -0.067,2.911 -0.564,4.279 -0.508,1.395 -1.327,2.701 -2.446,3.841 z"
|
||||
style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero"
|
||||
id="path1363" />
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 10 KiB |
@@ -52,6 +52,7 @@ import {
|
||||
writeMainConfig,
|
||||
writeTraefikConfigInPath,
|
||||
} from "@dokploy/server";
|
||||
import { checkGPUStatus, setupGPUSupport } from "@dokploy/server";
|
||||
import { generateOpenApiDocument } from "@dokploy/trpc-openapi";
|
||||
import { TRPCError } from "@trpc/server";
|
||||
import { sql } from "drizzle-orm";
|
||||
@@ -657,6 +658,54 @@ export const settingsRouter = createTRPCRouter({
|
||||
}
|
||||
return { status: "not_cloud" };
|
||||
}),
|
||||
setupGPU: adminProcedure
|
||||
.input(
|
||||
z.object({
|
||||
serverId: z.string().optional(),
|
||||
}),
|
||||
)
|
||||
.mutation(async ({ input }) => {
|
||||
if (IS_CLOUD && !input.serverId) {
|
||||
throw new Error("Select a server to enable the GPU Setup");
|
||||
}
|
||||
|
||||
try {
|
||||
await setupGPUSupport(input.serverId);
|
||||
return { success: true };
|
||||
} catch (error) {
|
||||
console.error("GPU Setup Error:", error);
|
||||
throw error;
|
||||
}
|
||||
}),
|
||||
checkGPUStatus: adminProcedure
|
||||
.input(
|
||||
z.object({
|
||||
serverId: z.string().optional(),
|
||||
}),
|
||||
)
|
||||
.query(async ({ input }) => {
|
||||
if (IS_CLOUD && !input.serverId) {
|
||||
return {
|
||||
driverInstalled: false,
|
||||
driverVersion: undefined,
|
||||
gpuModel: undefined,
|
||||
runtimeInstalled: false,
|
||||
runtimeConfigured: false,
|
||||
cudaSupport: undefined,
|
||||
cudaVersion: undefined,
|
||||
memoryInfo: undefined,
|
||||
availableGPUs: 0,
|
||||
swarmEnabled: false,
|
||||
gpuResources: 0,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
return await checkGPUStatus(input.serverId || "");
|
||||
} catch (error) {
|
||||
throw new Error("Failed to check GPU status");
|
||||
}
|
||||
}),
|
||||
});
|
||||
// {
|
||||
// "Parallelism": 1,
|
||||
|
||||
@@ -21,7 +21,6 @@ import {
|
||||
import type { Session, User } from "lucia";
|
||||
import superjson from "superjson";
|
||||
import { ZodError } from "zod";
|
||||
|
||||
/**
|
||||
* 1. CONTEXT
|
||||
*
|
||||
|
||||
26
apps/dokploy/templates/blender/docker-compose.yml
Normal file
26
apps/dokploy/templates/blender/docker-compose.yml
Normal file
@@ -0,0 +1,26 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
blender:
|
||||
image: lscr.io/linuxserver/blender:latest
|
||||
runtime: nvidia
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities:
|
||||
- gpu
|
||||
environment:
|
||||
- NVIDIA_VISIBLE_DEVICES=all
|
||||
- NVIDIA_DRIVER_CAPABILITIES=all
|
||||
- PUID=1000
|
||||
- PGID=1000
|
||||
- TZ=Etc/UTC
|
||||
- SUBFOLDER=/ #optional
|
||||
ports:
|
||||
- 3000
|
||||
- 3001
|
||||
restart: unless-stopped
|
||||
shm_size: 1gb
|
||||
34
apps/dokploy/templates/blender/index.ts
Normal file
34
apps/dokploy/templates/blender/index.ts
Normal file
@@ -0,0 +1,34 @@
|
||||
import {
|
||||
type DomainSchema,
|
||||
type Schema,
|
||||
type Template,
|
||||
generateHash,
|
||||
generateRandomDomain,
|
||||
} from "../utils";
|
||||
|
||||
export function generate(schema: Schema): Template {
|
||||
const mainServiceHash = generateHash(schema.projectName);
|
||||
const mainDomain = generateRandomDomain(schema);
|
||||
|
||||
const domains: DomainSchema[] = [
|
||||
{
|
||||
host: mainDomain,
|
||||
port: 3000,
|
||||
serviceName: "blender",
|
||||
},
|
||||
];
|
||||
|
||||
const envs = [
|
||||
"PUID=1000",
|
||||
"PGID=1000",
|
||||
"TZ=Etc/UTC",
|
||||
"SUBFOLDER=/",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_DRIVER_CAPABILITIES=all",
|
||||
];
|
||||
|
||||
return {
|
||||
envs,
|
||||
domains,
|
||||
};
|
||||
}
|
||||
@@ -702,4 +702,19 @@ export const templates: TemplateData[] = [
|
||||
tags: ["IA", "chat"],
|
||||
load: () => import("./lobe-chat/index").then((m) => m.generate),
|
||||
},
|
||||
{
|
||||
id: "blender",
|
||||
name: "Blender",
|
||||
version: "latest",
|
||||
description:
|
||||
"Blender is a free and open-source 3D creation suite. It supports the entire 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, video editing and 2D animation pipeline.",
|
||||
logo: "blender.svg",
|
||||
links: {
|
||||
github: "https://github.com/linuxserver/docker-blender",
|
||||
website: "https://www.blender.org/",
|
||||
docs: "https://docs.blender.org/",
|
||||
},
|
||||
tags: ["3d", "rendering", "animation"],
|
||||
load: () => import("./blender/index").then((m) => m.generate),
|
||||
},
|
||||
];
|
||||
|
||||
@@ -116,3 +116,4 @@ export * from "./monitoring/utilts";
|
||||
|
||||
export * from "./db/validations/domain";
|
||||
export * from "./db/validations/index";
|
||||
export * from "./utils/gpu-setup";
|
||||
|
||||
349
packages/server/src/utils/gpu-setup.ts
Normal file
349
packages/server/src/utils/gpu-setup.ts
Normal file
@@ -0,0 +1,349 @@
|
||||
import * as fs from "node:fs/promises";
|
||||
import { execAsync, sleep } from "../utils/process/execAsync";
|
||||
import { execAsyncRemote } from "../utils/process/execAsync";
|
||||
|
||||
interface GPUInfo {
|
||||
driverInstalled: boolean;
|
||||
driverVersion?: string;
|
||||
gpuModel?: string;
|
||||
runtimeInstalled: boolean;
|
||||
runtimeConfigured: boolean;
|
||||
cudaSupport: boolean;
|
||||
cudaVersion?: string;
|
||||
memoryInfo?: string;
|
||||
availableGPUs: number;
|
||||
swarmEnabled: boolean;
|
||||
gpuResources: number;
|
||||
}
|
||||
|
||||
export async function checkGPUStatus(serverId?: string): Promise<GPUInfo> {
|
||||
try {
|
||||
const [driverInfo, runtimeInfo, swarmInfo, gpuInfo, cudaInfo] =
|
||||
await Promise.all([
|
||||
checkGpuDriver(serverId),
|
||||
checkRuntime(serverId),
|
||||
checkSwarmResources(serverId),
|
||||
checkGpuInfo(serverId),
|
||||
checkCudaSupport(serverId),
|
||||
]);
|
||||
|
||||
return {
|
||||
...driverInfo,
|
||||
...runtimeInfo,
|
||||
...swarmInfo,
|
||||
...gpuInfo,
|
||||
...cudaInfo,
|
||||
};
|
||||
} catch (error) {
|
||||
console.error("Error in checkGPUStatus:", error);
|
||||
return {
|
||||
driverInstalled: false,
|
||||
driverVersion: undefined,
|
||||
runtimeInstalled: false,
|
||||
runtimeConfigured: false,
|
||||
cudaSupport: false,
|
||||
cudaVersion: undefined,
|
||||
gpuModel: undefined,
|
||||
memoryInfo: undefined,
|
||||
availableGPUs: 0,
|
||||
swarmEnabled: false,
|
||||
gpuResources: 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const checkGpuDriver = async (serverId?: string) => {
|
||||
let driverVersion: string | undefined;
|
||||
let driverInstalled = false;
|
||||
let availableGPUs = 0;
|
||||
|
||||
try {
|
||||
const driverCommand =
|
||||
"nvidia-smi --query-gpu=driver_version --format=csv,noheader";
|
||||
const { stdout: nvidiaSmi } = serverId
|
||||
? await execAsyncRemote(serverId, driverCommand)
|
||||
: await execAsync(driverCommand);
|
||||
|
||||
driverVersion = nvidiaSmi.trim();
|
||||
if (driverVersion) {
|
||||
driverInstalled = true;
|
||||
const countCommand =
|
||||
"nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l";
|
||||
const { stdout: gpuCount } = serverId
|
||||
? await execAsyncRemote(serverId, countCommand)
|
||||
: await execAsync(countCommand);
|
||||
|
||||
availableGPUs = Number.parseInt(gpuCount.trim(), 10);
|
||||
}
|
||||
} catch (error) {
|
||||
console.debug("GPU driver check:", error);
|
||||
}
|
||||
|
||||
return { driverVersion, driverInstalled, availableGPUs };
|
||||
};
|
||||
|
||||
const checkRuntime = async (serverId?: string) => {
|
||||
let runtimeInstalled = false;
|
||||
let runtimeConfigured = false;
|
||||
|
||||
try {
|
||||
// First check: Is nvidia-container-runtime installed?
|
||||
const checkBinaryCommand = "command -v nvidia-container-runtime";
|
||||
try {
|
||||
const { stdout } = serverId
|
||||
? await execAsyncRemote(serverId, checkBinaryCommand)
|
||||
: await execAsync(checkBinaryCommand);
|
||||
runtimeInstalled = !!stdout.trim();
|
||||
} catch (error) {
|
||||
console.debug("Runtime binary check:", error);
|
||||
}
|
||||
|
||||
// Second check: Is it configured in Docker?
|
||||
try {
|
||||
const runtimeCommand = 'docker info --format "{{json .Runtimes}}"';
|
||||
const { stdout: runtimeInfo } = serverId
|
||||
? await execAsyncRemote(serverId, runtimeCommand)
|
||||
: await execAsync(runtimeCommand);
|
||||
|
||||
const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"';
|
||||
const { stdout: defaultRuntime } = serverId
|
||||
? await execAsyncRemote(serverId, defaultCommand)
|
||||
: await execAsync(defaultCommand);
|
||||
|
||||
const runtimes = JSON.parse(runtimeInfo);
|
||||
const hasNvidiaRuntime = "nvidia" in runtimes;
|
||||
const isDefaultRuntime = defaultRuntime.trim() === "nvidia";
|
||||
|
||||
// Only set runtimeConfigured if both conditions are met
|
||||
runtimeConfigured = hasNvidiaRuntime && isDefaultRuntime;
|
||||
} catch (error) {
|
||||
console.debug("Runtime configuration check:", error);
|
||||
}
|
||||
} catch (error) {
|
||||
console.debug("Runtime check:", error);
|
||||
}
|
||||
|
||||
return { runtimeInstalled, runtimeConfigured };
|
||||
};
|
||||
|
||||
const checkSwarmResources = async (serverId?: string) => {
|
||||
let swarmEnabled = false;
|
||||
let gpuResources = 0;
|
||||
|
||||
try {
|
||||
const nodeCommand =
|
||||
"docker node inspect self --format '{{json .Description.Resources.GenericResources}}'";
|
||||
const { stdout: resources } = serverId
|
||||
? await execAsyncRemote(serverId, nodeCommand)
|
||||
: await execAsync(nodeCommand);
|
||||
|
||||
if (resources && resources !== "null") {
|
||||
const genericResources = JSON.parse(resources);
|
||||
for (const resource of genericResources) {
|
||||
if (
|
||||
resource.DiscreteResourceSpec &&
|
||||
(resource.DiscreteResourceSpec.Kind === "GPU" ||
|
||||
resource.DiscreteResourceSpec.Kind === "gpu")
|
||||
) {
|
||||
gpuResources = resource.DiscreteResourceSpec.Value;
|
||||
swarmEnabled = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.debug("Swarm resource check:", error);
|
||||
}
|
||||
|
||||
return { swarmEnabled, gpuResources };
|
||||
};
|
||||
|
||||
const checkGpuInfo = async (serverId?: string) => {
|
||||
let gpuModel: string | undefined;
|
||||
let memoryInfo: string | undefined;
|
||||
|
||||
try {
|
||||
const gpuInfoCommand =
|
||||
"nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader";
|
||||
const { stdout: gpuInfo } = serverId
|
||||
? await execAsyncRemote(serverId, gpuInfoCommand)
|
||||
: await execAsync(gpuInfoCommand);
|
||||
|
||||
[gpuModel, memoryInfo] = gpuInfo.split(",").map((s) => s.trim());
|
||||
} catch (error) {
|
||||
console.debug("GPU info check:", error);
|
||||
}
|
||||
|
||||
return { gpuModel, memoryInfo };
|
||||
};
|
||||
|
||||
const checkCudaSupport = async (serverId?: string) => {
|
||||
let cudaVersion: string | undefined;
|
||||
let cudaSupport = false;
|
||||
|
||||
try {
|
||||
const cudaCommand = 'nvidia-smi -q | grep "CUDA Version"';
|
||||
const { stdout: cudaInfo } = serverId
|
||||
? await execAsyncRemote(serverId, cudaCommand)
|
||||
: await execAsync(cudaCommand);
|
||||
|
||||
const cudaMatch = cudaInfo.match(/CUDA Version\s*:\s*([\d\.]+)/);
|
||||
cudaVersion = cudaMatch ? cudaMatch[1] : undefined;
|
||||
cudaSupport = !!cudaVersion;
|
||||
} catch (error) {
|
||||
console.debug("CUDA support check:", error);
|
||||
}
|
||||
|
||||
return { cudaVersion, cudaSupport };
|
||||
};
|
||||
|
||||
export async function setupGPUSupport(serverId?: string): Promise<void> {
|
||||
try {
|
||||
// 1. Initial status check and validation
|
||||
const initialStatus = await checkGPUStatus(serverId);
|
||||
const shouldContinue = await validatePrerequisites(initialStatus);
|
||||
if (!shouldContinue) return;
|
||||
|
||||
// 2. Get node ID
|
||||
const nodeId = await getNodeId(serverId);
|
||||
|
||||
// 3. Create daemon configuration
|
||||
const daemonConfig = createDaemonConfig(initialStatus.availableGPUs);
|
||||
|
||||
// 4. Setup server based on environment
|
||||
if (serverId) {
|
||||
await setupRemoteServer(serverId, daemonConfig);
|
||||
} else {
|
||||
await setupLocalServer(daemonConfig);
|
||||
}
|
||||
|
||||
// 5. Wait for Docker restart
|
||||
await sleep(10000);
|
||||
|
||||
// 6. Add GPU label
|
||||
await addGpuLabel(nodeId, serverId);
|
||||
|
||||
// 7. Final verification
|
||||
await sleep(5000);
|
||||
await verifySetup(nodeId, serverId);
|
||||
} catch (error) {
|
||||
if (
|
||||
error instanceof Error &&
|
||||
error.message.includes("password is required")
|
||||
) {
|
||||
throw new Error(
|
||||
"Sudo access required. Please run with appropriate permissions.",
|
||||
);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
const validatePrerequisites = async (initialStatus: GPUInfo) => {
|
||||
if (!initialStatus.driverInstalled) {
|
||||
throw new Error(
|
||||
"NVIDIA drivers not installed. Please install appropriate NVIDIA drivers first.",
|
||||
);
|
||||
}
|
||||
|
||||
if (!initialStatus.runtimeInstalled) {
|
||||
throw new Error(
|
||||
"NVIDIA Container Runtime not installed. Please install nvidia-container-runtime first.",
|
||||
);
|
||||
}
|
||||
|
||||
if (initialStatus.swarmEnabled && initialStatus.runtimeConfigured) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
const getNodeId = async (serverId?: string) => {
|
||||
const nodeIdCommand = 'docker info --format "{{.Swarm.NodeID}}"';
|
||||
const { stdout: nodeId } = serverId
|
||||
? await execAsyncRemote(serverId, nodeIdCommand)
|
||||
: await execAsync(nodeIdCommand);
|
||||
|
||||
const trimmedNodeId = nodeId.trim();
|
||||
if (!trimmedNodeId) {
|
||||
throw new Error("Setup Server before enabling GPU support");
|
||||
}
|
||||
|
||||
return trimmedNodeId;
|
||||
};
|
||||
|
||||
const createDaemonConfig = (availableGPUs: number) => ({
|
||||
runtimes: {
|
||||
nvidia: {
|
||||
path: "nvidia-container-runtime",
|
||||
runtimeArgs: [],
|
||||
},
|
||||
},
|
||||
"default-runtime": "nvidia",
|
||||
"node-generic-resources": [`GPU=${availableGPUs}`],
|
||||
});
|
||||
|
||||
const setupRemoteServer = async (serverId: string, daemonConfig: any) => {
|
||||
const setupCommands = [
|
||||
"sudo -n true",
|
||||
`echo '${JSON.stringify(daemonConfig, null, 2)}' | sudo tee /etc/docker/daemon.json`,
|
||||
"sudo mkdir -p /etc/nvidia-container-runtime",
|
||||
'sudo sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml',
|
||||
'echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" | sudo tee -a /etc/nvidia-container-runtime/config.toml',
|
||||
"sudo systemctl daemon-reload",
|
||||
"sudo systemctl restart docker",
|
||||
].join(" && ");
|
||||
|
||||
await execAsyncRemote(serverId, setupCommands);
|
||||
};
|
||||
|
||||
const setupLocalServer = async (daemonConfig: any) => {
|
||||
const configFile = `/tmp/docker-daemon-${Date.now()}.json`;
|
||||
await fs.writeFile(configFile, JSON.stringify(daemonConfig, null, 2));
|
||||
|
||||
const setupCommands = [
|
||||
`pkexec sh -c '
|
||||
cp ${configFile} /etc/docker/daemon.json &&
|
||||
mkdir -p /etc/nvidia-container-runtime &&
|
||||
sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml &&
|
||||
echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" >> /etc/nvidia-container-runtime/config.toml &&
|
||||
systemctl daemon-reload &&
|
||||
systemctl restart docker
|
||||
'`,
|
||||
`rm ${configFile}`,
|
||||
].join(" && ");
|
||||
|
||||
await execAsync(setupCommands);
|
||||
};
|
||||
|
||||
const addGpuLabel = async (nodeId: string, serverId?: string) => {
|
||||
const labelCommand = `docker node update --label-add gpu=true ${nodeId}`;
|
||||
if (serverId) {
|
||||
await execAsyncRemote(serverId, labelCommand);
|
||||
} else {
|
||||
await execAsync(labelCommand);
|
||||
}
|
||||
};
|
||||
|
||||
const verifySetup = async (nodeId: string, serverId?: string) => {
|
||||
const finalStatus = await checkGPUStatus(serverId);
|
||||
|
||||
if (!finalStatus.swarmEnabled) {
|
||||
const diagnosticCommands = [
|
||||
`docker node inspect ${nodeId}`,
|
||||
'nvidia-smi -a | grep "GPU UUID"',
|
||||
"cat /etc/docker/daemon.json",
|
||||
"cat /etc/nvidia-container-runtime/config.toml",
|
||||
].join(" && ");
|
||||
|
||||
const { stdout: diagnostics } = serverId
|
||||
? await execAsyncRemote(serverId, diagnosticCommands)
|
||||
: await execAsync(diagnosticCommands);
|
||||
|
||||
console.error("Diagnostic Information:", diagnostics);
|
||||
throw new Error("GPU support not detected in swarm after setup");
|
||||
}
|
||||
|
||||
return finalStatus;
|
||||
};
|
||||
Reference in New Issue
Block a user