mirror of
https://github.com/Dokploy/dokploy
synced 2025-06-26 18:27:59 +00:00
Merge branch 'canary' into feat/nextcloud-aio-template
This commit is contained in:
@@ -13,6 +13,7 @@ import {
|
|||||||
import { api } from "@/utils/api";
|
import { api } from "@/utils/api";
|
||||||
import { toast } from "sonner";
|
import { toast } from "sonner";
|
||||||
import { ShowModalLogs } from "../../web-server/show-modal-logs";
|
import { ShowModalLogs } from "../../web-server/show-modal-logs";
|
||||||
|
import { GPUSupportModal } from "../gpu-support-modal";
|
||||||
|
|
||||||
export const ShowDokployActions = () => {
|
export const ShowDokployActions = () => {
|
||||||
const { mutateAsync: reloadServer, isLoading } =
|
const { mutateAsync: reloadServer, isLoading } =
|
||||||
@@ -45,6 +46,7 @@ export const ShowDokployActions = () => {
|
|||||||
<ShowModalLogs appName="dokploy">
|
<ShowModalLogs appName="dokploy">
|
||||||
<span>Watch logs</span>
|
<span>Watch logs</span>
|
||||||
</ShowModalLogs>
|
</ShowModalLogs>
|
||||||
|
<GPUSupportModal />
|
||||||
</DropdownMenuGroup>
|
</DropdownMenuGroup>
|
||||||
</DropdownMenuContent>
|
</DropdownMenuContent>
|
||||||
</DropdownMenu>
|
</DropdownMenu>
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
import {
|
||||||
|
Dialog,
|
||||||
|
DialogContent,
|
||||||
|
DialogHeader,
|
||||||
|
DialogTitle,
|
||||||
|
DialogTrigger,
|
||||||
|
} from "@/components/ui/dialog";
|
||||||
|
import { DropdownMenuItem } from "@/components/ui/dropdown-menu";
|
||||||
|
import { useState } from "react";
|
||||||
|
import { GPUSupport } from "./gpu-support";
|
||||||
|
|
||||||
|
export const GPUSupportModal = () => {
|
||||||
|
const [isOpen, setIsOpen] = useState(false);
|
||||||
|
|
||||||
|
return (
|
||||||
|
<Dialog open={isOpen} onOpenChange={setIsOpen}>
|
||||||
|
<DialogTrigger asChild>
|
||||||
|
<DropdownMenuItem
|
||||||
|
className="w-full cursor-pointer"
|
||||||
|
onSelect={(e) => e.preventDefault()}
|
||||||
|
>
|
||||||
|
<span>GPU Setup</span>
|
||||||
|
</DropdownMenuItem>
|
||||||
|
</DialogTrigger>
|
||||||
|
<DialogContent className="sm:max-w-4xl overflow-y-auto max-h-screen">
|
||||||
|
<DialogHeader>
|
||||||
|
<DialogTitle className="flex items-center gap-2">
|
||||||
|
Dokploy Server GPU Setup
|
||||||
|
</DialogTitle>
|
||||||
|
</DialogHeader>
|
||||||
|
|
||||||
|
<GPUSupport serverId="" />
|
||||||
|
</DialogContent>
|
||||||
|
</Dialog>
|
||||||
|
);
|
||||||
|
};
|
||||||
@@ -0,0 +1,282 @@
|
|||||||
|
import { AlertBlock } from "@/components/shared/alert-block";
|
||||||
|
import { DialogAction } from "@/components/shared/dialog-action";
|
||||||
|
import { Button } from "@/components/ui/button";
|
||||||
|
import {
|
||||||
|
Card,
|
||||||
|
CardContent,
|
||||||
|
CardDescription,
|
||||||
|
CardHeader,
|
||||||
|
CardTitle,
|
||||||
|
} from "@/components/ui/card";
|
||||||
|
import { api } from "@/utils/api";
|
||||||
|
import { TRPCClientError } from "@trpc/client";
|
||||||
|
import { CheckCircle2, Cpu, Loader2, RefreshCw, XCircle } from "lucide-react";
|
||||||
|
import { useEffect, useState } from "react";
|
||||||
|
import { toast } from "sonner";
|
||||||
|
|
||||||
|
interface GPUSupportProps {
|
||||||
|
serverId?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function GPUSupport({ serverId }: GPUSupportProps) {
|
||||||
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
|
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||||
|
const utils = api.useContext();
|
||||||
|
|
||||||
|
const {
|
||||||
|
data: gpuStatus,
|
||||||
|
isLoading: isChecking,
|
||||||
|
refetch,
|
||||||
|
} = api.settings.checkGPUStatus.useQuery(
|
||||||
|
{ serverId },
|
||||||
|
{
|
||||||
|
enabled: serverId !== undefined,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
const setupGPU = api.settings.setupGPU.useMutation({
|
||||||
|
onMutate: () => {
|
||||||
|
setIsLoading(true);
|
||||||
|
},
|
||||||
|
onSuccess: async () => {
|
||||||
|
toast.success("GPU support enabled successfully");
|
||||||
|
setIsLoading(false);
|
||||||
|
await utils.settings.checkGPUStatus.invalidate({ serverId });
|
||||||
|
},
|
||||||
|
onError: (error) => {
|
||||||
|
toast.error(
|
||||||
|
error.message ||
|
||||||
|
"Failed to enable GPU support. Please check server logs.",
|
||||||
|
);
|
||||||
|
setIsLoading(false);
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const handleRefresh = async () => {
|
||||||
|
setIsRefreshing(true);
|
||||||
|
try {
|
||||||
|
await utils.settings.checkGPUStatus.invalidate({ serverId });
|
||||||
|
await refetch();
|
||||||
|
} catch (error) {
|
||||||
|
toast.error("Failed to refresh GPU status");
|
||||||
|
} finally {
|
||||||
|
setIsRefreshing(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
useEffect(() => {
|
||||||
|
handleRefresh();
|
||||||
|
}, []);
|
||||||
|
|
||||||
|
const handleEnableGPU = async () => {
|
||||||
|
if (serverId === undefined) {
|
||||||
|
toast.error("No server selected");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await setupGPU.mutateAsync({ serverId });
|
||||||
|
} catch (error) {
|
||||||
|
// Error handling is done in mutation's onError
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<CardContent className="p-0">
|
||||||
|
<div className="flex flex-col gap-4">
|
||||||
|
<Card className="bg-background">
|
||||||
|
<CardHeader className="flex flex-row items-center justify-between flex-wrap gap-2">
|
||||||
|
<div className="flex flex-row gap-2 justify-between w-full items-end max-sm:flex-col">
|
||||||
|
<div className="flex flex-col gap-1">
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<Cpu className="size-5" />
|
||||||
|
<CardTitle className="text-xl">GPU Configuration</CardTitle>
|
||||||
|
</div>
|
||||||
|
<CardDescription>
|
||||||
|
Configure and monitor GPU support
|
||||||
|
</CardDescription>
|
||||||
|
</div>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<DialogAction
|
||||||
|
title="Enable GPU Support?"
|
||||||
|
description="This will enable GPU support for Docker Swarm on this server. Make sure you have the required hardware and drivers installed."
|
||||||
|
onClick={handleEnableGPU}
|
||||||
|
>
|
||||||
|
<Button
|
||||||
|
isLoading={isLoading}
|
||||||
|
disabled={isLoading || serverId === undefined || isChecking}
|
||||||
|
>
|
||||||
|
{isLoading
|
||||||
|
? "Enabling GPU..."
|
||||||
|
: gpuStatus?.swarmEnabled
|
||||||
|
? "Reconfigure GPU"
|
||||||
|
: "Enable GPU"}
|
||||||
|
</Button>
|
||||||
|
</DialogAction>
|
||||||
|
<Button
|
||||||
|
size="icon"
|
||||||
|
onClick={handleRefresh}
|
||||||
|
disabled={isChecking || isRefreshing}
|
||||||
|
>
|
||||||
|
<RefreshCw
|
||||||
|
className={`h-5 w-5 ${isChecking || isRefreshing ? "animate-spin" : ""}`}
|
||||||
|
/>
|
||||||
|
</Button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</CardHeader>
|
||||||
|
|
||||||
|
<CardContent className="flex flex-col gap-4">
|
||||||
|
<AlertBlock type="info">
|
||||||
|
<div className="font-medium mb-2">System Requirements:</div>
|
||||||
|
<ul className="list-disc list-inside text-sm space-y-1">
|
||||||
|
<li>NVIDIA GPU hardware must be physically installed</li>
|
||||||
|
<li>
|
||||||
|
NVIDIA drivers must be installed and running (check with
|
||||||
|
nvidia-smi)
|
||||||
|
</li>
|
||||||
|
<li>
|
||||||
|
NVIDIA Container Runtime must be installed
|
||||||
|
(nvidia-container-runtime)
|
||||||
|
</li>
|
||||||
|
<li>User must have sudo/administrative privileges</li>
|
||||||
|
<li>System must support CUDA for GPU acceleration</li>
|
||||||
|
</ul>
|
||||||
|
</AlertBlock>
|
||||||
|
|
||||||
|
{isChecking ? (
|
||||||
|
<div className="flex items-center justify-center text-muted-foreground py-4">
|
||||||
|
<Loader2 className="mr-2 h-4 w-4 animate-spin" />
|
||||||
|
<span>Checking GPU status...</span>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="grid gap-4">
|
||||||
|
{/* Prerequisites Section */}
|
||||||
|
<div className="border rounded-lg p-4">
|
||||||
|
<h3 className="text-lg font-semibold mb-1">Prerequisites</h3>
|
||||||
|
<p className="text-sm text-muted-foreground mb-4">
|
||||||
|
Shows all software checks and available hardware
|
||||||
|
</p>
|
||||||
|
<div className="grid gap-2.5">
|
||||||
|
<StatusRow
|
||||||
|
label="NVIDIA Driver"
|
||||||
|
isEnabled={gpuStatus?.driverInstalled}
|
||||||
|
description={
|
||||||
|
gpuStatus?.driverVersion
|
||||||
|
? `Installed (v${gpuStatus.driverVersion})`
|
||||||
|
: "Not Installed"
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<StatusRow
|
||||||
|
label="GPU Model"
|
||||||
|
value={gpuStatus?.gpuModel || "Not Detected"}
|
||||||
|
showIcon={false}
|
||||||
|
/>
|
||||||
|
<StatusRow
|
||||||
|
label="GPU Memory"
|
||||||
|
value={gpuStatus?.memoryInfo || "Not Available"}
|
||||||
|
showIcon={false}
|
||||||
|
/>
|
||||||
|
<StatusRow
|
||||||
|
label="Available GPUs"
|
||||||
|
value={gpuStatus?.availableGPUs || 0}
|
||||||
|
showIcon={false}
|
||||||
|
/>
|
||||||
|
<StatusRow
|
||||||
|
label="CUDA Support"
|
||||||
|
isEnabled={gpuStatus?.cudaSupport}
|
||||||
|
description={
|
||||||
|
gpuStatus?.cudaVersion
|
||||||
|
? `Available (v${gpuStatus.cudaVersion})`
|
||||||
|
: "Not Available"
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<StatusRow
|
||||||
|
label="NVIDIA Container Runtime"
|
||||||
|
isEnabled={gpuStatus?.runtimeInstalled}
|
||||||
|
description={
|
||||||
|
gpuStatus?.runtimeInstalled
|
||||||
|
? "Installed"
|
||||||
|
: "Not Installed"
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Configuration Status */}
|
||||||
|
<div className="border rounded-lg p-4">
|
||||||
|
<h3 className="text-lg font-semibold mb-1">
|
||||||
|
Docker Swarm GPU Status
|
||||||
|
</h3>
|
||||||
|
<p className="text-sm text-muted-foreground mb-4">
|
||||||
|
Shows the configuration state that changes with the Enable
|
||||||
|
GPU
|
||||||
|
</p>
|
||||||
|
<div className="grid gap-2.5">
|
||||||
|
<StatusRow
|
||||||
|
label="Runtime Configuration"
|
||||||
|
isEnabled={gpuStatus?.runtimeConfigured}
|
||||||
|
description={
|
||||||
|
gpuStatus?.runtimeConfigured
|
||||||
|
? "Default Runtime"
|
||||||
|
: "Not Default Runtime"
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
<StatusRow
|
||||||
|
label="Swarm GPU Support"
|
||||||
|
isEnabled={gpuStatus?.swarmEnabled}
|
||||||
|
description={
|
||||||
|
gpuStatus?.swarmEnabled
|
||||||
|
? `Enabled (${gpuStatus.gpuResources} GPU${gpuStatus.gpuResources !== 1 ? "s" : ""})`
|
||||||
|
: "Not Enabled"
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</CardContent>
|
||||||
|
</Card>
|
||||||
|
</div>
|
||||||
|
</CardContent>
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
interface StatusRowProps {
|
||||||
|
label: string;
|
||||||
|
isEnabled?: boolean;
|
||||||
|
description?: string;
|
||||||
|
value?: string | number;
|
||||||
|
showIcon?: boolean;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function StatusRow({
|
||||||
|
label,
|
||||||
|
isEnabled,
|
||||||
|
description,
|
||||||
|
value,
|
||||||
|
showIcon = true,
|
||||||
|
}: StatusRowProps) {
|
||||||
|
return (
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<span className="text-sm">{label}</span>
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
{showIcon ? (
|
||||||
|
<>
|
||||||
|
{isEnabled ? (
|
||||||
|
<CheckCircle2 className="size-4 text-green-500" />
|
||||||
|
) : (
|
||||||
|
<XCircle className="size-4 text-red-500" />
|
||||||
|
)}
|
||||||
|
<span
|
||||||
|
className={`text-sm ${isEnabled ? "text-green-500" : "text-red-500"}`}
|
||||||
|
>
|
||||||
|
{description || (isEnabled ? "Installed" : "Not Installed")}
|
||||||
|
</span>
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<span className="text-sm text-muted-foreground">{value}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -32,6 +32,7 @@ import Link from "next/link";
|
|||||||
import { useState } from "react";
|
import { useState } from "react";
|
||||||
import { toast } from "sonner";
|
import { toast } from "sonner";
|
||||||
import { ShowDeployment } from "../../application/deployments/show-deployment";
|
import { ShowDeployment } from "../../application/deployments/show-deployment";
|
||||||
|
import { GPUSupport } from "./gpu-support";
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
serverId: string;
|
serverId: string;
|
||||||
@@ -89,9 +90,10 @@ export const SetupServer = ({ serverId }: Props) => {
|
|||||||
) : (
|
) : (
|
||||||
<div id="hook-form-add-gitlab" className="grid w-full gap-1">
|
<div id="hook-form-add-gitlab" className="grid w-full gap-1">
|
||||||
<Tabs defaultValue="ssh-keys">
|
<Tabs defaultValue="ssh-keys">
|
||||||
<TabsList className="grid grid-cols-2 w-[400px]">
|
<TabsList className="grid grid-cols-3 w-[400px]">
|
||||||
<TabsTrigger value="ssh-keys">SSH Keys</TabsTrigger>
|
<TabsTrigger value="ssh-keys">SSH Keys</TabsTrigger>
|
||||||
<TabsTrigger value="deployments">Deployments</TabsTrigger>
|
<TabsTrigger value="deployments">Deployments</TabsTrigger>
|
||||||
|
<TabsTrigger value="gpu-setup">GPU Setup</TabsTrigger>
|
||||||
</TabsList>
|
</TabsList>
|
||||||
<TabsContent
|
<TabsContent
|
||||||
value="ssh-keys"
|
value="ssh-keys"
|
||||||
@@ -291,6 +293,14 @@ export const SetupServer = ({ serverId }: Props) => {
|
|||||||
</div>
|
</div>
|
||||||
</CardContent>
|
</CardContent>
|
||||||
</TabsContent>
|
</TabsContent>
|
||||||
|
<TabsContent
|
||||||
|
value="gpu-setup"
|
||||||
|
className="outline-none ring-0 focus-visible:ring-0 focus-visible:ring-offset-0"
|
||||||
|
>
|
||||||
|
<div className="flex flex-col gap-2 text-sm text-muted-foreground pt-3">
|
||||||
|
<GPUSupport serverId={serverId} />
|
||||||
|
</div>
|
||||||
|
</TabsContent>
|
||||||
</Tabs>
|
</Tabs>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|||||||
153
apps/dokploy/public/templates/blender.svg
Normal file
153
apps/dokploy/public/templates/blender.svg
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||||
|
|
||||||
|
<svg
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
|
xmlns:cc="http://creativecommons.org/ns#"
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:svg="http://www.w3.org/2000/svg"
|
||||||
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||||
|
width="11.567343mm"
|
||||||
|
height="15.032981mm"
|
||||||
|
viewBox="0 0 11.567343 15.03298"
|
||||||
|
version="1.1"
|
||||||
|
id="svg8"
|
||||||
|
sodipodi:docname="community_logo_black.svg">
|
||||||
|
<defs
|
||||||
|
id="defs2" />
|
||||||
|
<sodipodi:namedview
|
||||||
|
id="base"
|
||||||
|
pagecolor="#c8c8c8"
|
||||||
|
bordercolor="#666666"
|
||||||
|
borderopacity="1.0"
|
||||||
|
showgrid="false"
|
||||||
|
showguides="true"
|
||||||
|
borderlayer="true"
|
||||||
|
fit-margin-top="1"
|
||||||
|
fit-margin-left="1"
|
||||||
|
fit-margin-right="1"
|
||||||
|
fit-margin-bottom="1"/>
|
||||||
|
<metadata
|
||||||
|
id="metadata5">
|
||||||
|
<rdf:RDF>
|
||||||
|
<cc:Work
|
||||||
|
rdf:about="">
|
||||||
|
<dc:format>image/svg+xml</dc:format>
|
||||||
|
<dc:type
|
||||||
|
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||||
|
<dc:title></dc:title>
|
||||||
|
</cc:Work>
|
||||||
|
</rdf:RDF>
|
||||||
|
</metadata>
|
||||||
|
<g
|
||||||
|
id="layer1"
|
||||||
|
transform="translate(-115.93625,-150.07138)">
|
||||||
|
<g
|
||||||
|
transform="translate(-3.8788837,214.53487)"
|
||||||
|
id="g1369">
|
||||||
|
<path
|
||||||
|
style="opacity:1;fill:#000000;fill-opacity:0.07058824;stroke:none;stroke-width:0.31555739;stroke-miterlimit:1.41420996;stroke-dasharray:none;stroke-opacity:1;paint-order:markers stroke fill"
|
||||||
|
d="m 121.59341,-62.933898 c -0.43151,0 -0.77882,0.347312 -0.77882,0.778817 v 7.918777 c 0,0.04214 0.004,0.08316 0.0106,0.12345 7.5e-4,0.0053 10e-4,0.01041 0.002,0.01567 0.001,0.0073 0.002,0.01466 0.004,0.02186 0.10284,0.693169 0.73757,1.119278 2.19888,2.190555 2.64127,1.936306 2.45943,1.935512 5.11716,0.02186 1.68877,-1.215962 2.28048,-1.590346 2.23197,-2.501308 v -7.790874 c 0,-0.431505 -0.34751,-0.778817 -0.77902,-0.778817 z"
|
||||||
|
id="path1373"/>
|
||||||
|
<path
|
||||||
|
id="path1323"
|
||||||
|
d="m 121.59341,-63.463065 c -0.43151,0 -0.77882,0.347312 -0.77882,0.778817 v 7.918777 c 0,0.04214 0.004,0.08316 0.0106,0.12345 7.5e-4,0.0053 10e-4,0.01041 0.002,0.01567 0.001,0.0073 0.002,0.01466 0.004,0.02186 0.10284,0.693169 0.73757,1.119278 2.19888,2.190555 2.64127,1.936306 2.45943,1.935512 5.11716,0.02186 1.68877,-1.215962 2.28048,-1.590346 2.23197,-2.501308 v -7.790874 c 0,-0.431505 -0.34751,-0.778817 -0.77902,-0.778817 z"
|
||||||
|
style="opacity:1;fill:#363636;fill-opacity:1;stroke:none;stroke-width:0.31555739;stroke-miterlimit:1.41420996;stroke-dasharray:none;stroke-opacity:1;paint-order:markers stroke fill" />
|
||||||
|
<g
|
||||||
|
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996"
|
||||||
|
id="g1353"
|
||||||
|
transform="matrix(0.02054188,0,0,0.02054188,97.15326,-61.563495)">
|
||||||
|
<g
|
||||||
|
id="g1327"
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||||
|
<path
|
||||||
|
id="path1325"
|
||||||
|
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
d="m 364.467,-333.746 c 0.171,-1.908 1.646,-3.118 3.899,-3.118 2.256,0 3.73,1.21 3.901,3.118 z m 7.569,4.711 c -0.577,1.414 -1.937,2.251 -3.784,2.251 -2.313,0 -3.87,-1.444 -3.933,-3.725 h 13.297 c 0,-0.237 0,-0.435 0,-0.671 0,-5.714 -3.354,-8.925 -9.364,-8.925 -5.836,0 -9.365,3.241 -9.365,8.324 0,5.114 3.584,8.35 9.365,8.35 3.469,0 6.159,-1.189 7.817,-3.279 z"/>
|
||||||
|
</g>
|
||||||
|
<g
|
||||||
|
id="g1331"
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||||
|
<path
|
||||||
|
id="path1329"
|
||||||
|
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
d="m 305.468,-333.737 c 0.176,-1.908 1.651,-3.118 3.906,-3.118 2.252,0 3.726,1.21 3.899,3.118 z m 7.574,4.711 c -0.578,1.418 -1.937,2.255 -3.788,2.255 -2.309,0 -3.87,-1.448 -3.931,-3.73 h 13.294 c 0,-0.234 0,-0.431 0,-0.667 0,-5.717 -3.353,-8.929 -9.363,-8.929 -5.839,0 -9.361,3.242 -9.361,8.325 0,5.114 3.582,8.35 9.361,8.35 3.468,0 6.16,-1.185 7.821,-3.278 z"/>
|
||||||
|
</g>
|
||||||
|
<g
|
||||||
|
id="g1335"
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||||
|
<rect
|
||||||
|
id="rect1333"
|
||||||
|
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
height="19.617001"
|
||||||
|
width="4.7950001"
|
||||||
|
y="-343.56"
|
||||||
|
x="293.90701" />
|
||||||
|
</g>
|
||||||
|
<g
|
||||||
|
id="g1339"
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||||
|
<path
|
||||||
|
id="path1337"
|
||||||
|
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
d="m 319.81,-338.348 h 4.822 v 1.168 c 1.707,-1.822 3.757,-2.743 6.069,-2.743 2.663,0 4.679,0.921 5.72,2.489 0.869,1.295 0.926,2.858 0.926,4.912 v 8.579 h -4.829 v -7.538 c 0,-3.128 -0.629,-4.572 -3.375,-4.572 -2.775,0 -4.511,1.653 -4.511,4.428 v 7.682 h -4.822 z"/>
|
||||||
|
</g>
|
||||||
|
<g
|
||||||
|
id="g1343"
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||||
|
<path
|
||||||
|
id="path1341"
|
||||||
|
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
d="m 352.876,-331.538 c 0,2.685 -1.794,4.446 -4.57,4.446 -2.778,0 -4.572,-1.701 -4.572,-4.415 0,-2.754 1.77,-4.454 4.572,-4.454 2.776,0 4.57,1.73 4.57,4.423 z m 0,-6.157 c -1.219,-1.307 -2.983,-2.024 -5.435,-2.024 -5.29,0 -8.902,3.262 -8.902,8.151 0,4.793 3.587,8.146 8.815,8.146 2.397,0 4.157,-0.606 5.522,-1.965 v 1.444 h 4.825 v -20.861 l -4.825,1.244 z"/>
|
||||||
|
</g>
|
||||||
|
<g
|
||||||
|
id="g1347"
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||||
|
<path
|
||||||
|
id="path1345"
|
||||||
|
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
d="m 282.947,-335.961 c 2.804,0 4.567,1.7 4.567,4.454 0,2.714 -1.791,4.415 -4.567,4.415 -2.774,0 -4.566,-1.761 -4.566,-4.446 0,-2.693 1.792,-4.423 4.566,-4.423 z m -4.566,-7.599 -4.827,-1.244 v 20.861 h 4.827 v -1.444 c 1.358,1.359 3.121,1.965 5.52,1.965 5.231,0 8.813,-3.353 8.813,-8.146 0,-4.889 -3.613,-8.151 -8.9,-8.151 -2.457,0 -4.22,0.717 -5.433,2.024 z"/>
|
||||||
|
</g>
|
||||||
|
<g
|
||||||
|
id="g1351"
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
style="clip-rule:evenodd;fill:#d8d8d8;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996">
|
||||||
|
<path
|
||||||
|
id="path1349"
|
||||||
|
style="fill:#d8d8d8;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
d="m 378.806,-323.943 v -14.405 h 4.825 v 0.89 c 1.445,-1.74 2.974,-2.606 4.713,-2.606 0.345,0 0.779,0.056 1.356,0.113 v 4.107 c -0.465,-0.061 -0.983,-0.061 -1.533,-0.061 -2.805,0 -4.536,1.85 -4.536,4.996 v 6.966 z"/>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
<g
|
||||||
|
transform="matrix(0.04039667,0,0,0.04039667,81.604348,-55.892386)"
|
||||||
|
style="clip-rule:evenodd;fill:#ffffff;fill-opacity:1;fill-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:1.41420996"
|
||||||
|
id="g1367">
|
||||||
|
<g
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
id="g1361"
|
||||||
|
style="fill:#ffffff;fill-opacity:1">
|
||||||
|
<path
|
||||||
|
d="m 243.13,-333.715 c 0.106,-1.891 1.032,-3.557 2.429,-4.738 1.37,-1.16 3.214,-1.869 5.226,-1.869 2.01,0 3.854,0.709 5.225,1.869 1.396,1.181 2.322,2.847 2.429,4.736 0.106,1.943 -0.675,3.748 -2.045,5.086 -1.397,1.361 -3.384,2.215 -5.609,2.215 -2.225,0 -4.216,-0.854 -5.612,-2.215 -1.371,-1.338 -2.15,-3.143 -2.043,-5.084 z"
|
||||||
|
style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
id="path1359" />
|
||||||
|
</g>
|
||||||
|
<g
|
||||||
|
transform="matrix(3.3451117,0,0,3.3451075,277.7359,1100.2048)"
|
||||||
|
id="g1365"
|
||||||
|
style="fill:#ffffff;fill-opacity:1">
|
||||||
|
<path
|
||||||
|
d="m 230.94,-329.894 c 0.013,0.74 0.249,2.178 0.603,3.301 0.744,2.377 2.006,4.576 3.762,6.514 1.802,1.992 4.021,3.592 6.584,4.728 2.694,1.193 5.613,1.801 8.645,1.796 3.027,-0.004 5.946,-0.624 8.64,-1.826 2.563,-1.147 4.78,-2.754 6.579,-4.747 1.755,-1.946 3.015,-4.149 3.761,-6.526 0.375,-1.201 0.612,-2.42 0.707,-3.643 0.093,-1.205 0.054,-2.412 -0.117,-3.618 -0.334,-2.35 -1.147,-4.555 -2.399,-6.565 -1.145,-1.847 -2.621,-3.464 -4.376,-4.825 l 0.004,-0.003 -17.711,-13.599 c -0.016,-0.012 -0.029,-0.025 -0.046,-0.036 -1.162,-0.892 -3.116,-0.889 -4.394,0.005 -1.292,0.904 -1.44,2.399 -0.29,3.342 l -0.005,0.005 7.387,6.007 -22.515,0.024 c -0.011,0 -0.022,0 -0.03,0 -1.861,0.002 -3.65,1.223 -4.004,2.766 -0.364,1.572 0.9,2.876 2.835,2.883 l -0.003,0.007 11.412,-0.022 -20.364,15.631 c -0.026,0.019 -0.054,0.039 -0.078,0.058 -1.921,1.471 -2.542,3.917 -1.332,5.465 1.228,1.574 3.839,1.577 5.78,0.009 l 11.114,-9.096 c 0,0 -0.162,1.228 -0.149,1.965 z m 28.559,4.112 c -2.29,2.333 -5.496,3.656 -8.965,3.663 -3.474,0.006 -6.68,-1.305 -8.97,-3.634 -1.119,-1.135 -1.941,-2.441 -2.448,-3.832 -0.497,-1.367 -0.69,-2.818 -0.562,-4.282 0.121,-1.431 0.547,-2.796 1.227,-4.031 0.668,-1.214 1.588,-2.311 2.724,-3.239 2.226,-1.814 5.06,-2.796 8.024,-2.8 2.967,-0.004 5.799,0.969 8.027,2.777 1.134,0.924 2.053,2.017 2.721,3.229 0.683,1.234 1.106,2.594 1.232,4.029 0.126,1.462 -0.067,2.911 -0.564,4.279 -0.508,1.395 -1.327,2.701 -2.446,3.841 z"
|
||||||
|
style="fill:#ffffff;fill-opacity:1;fill-rule:nonzero"
|
||||||
|
id="path1363" />
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 10 KiB |
@@ -52,6 +52,7 @@ import {
|
|||||||
writeMainConfig,
|
writeMainConfig,
|
||||||
writeTraefikConfigInPath,
|
writeTraefikConfigInPath,
|
||||||
} from "@dokploy/server";
|
} from "@dokploy/server";
|
||||||
|
import { checkGPUStatus, setupGPUSupport } from "@dokploy/server";
|
||||||
import { generateOpenApiDocument } from "@dokploy/trpc-openapi";
|
import { generateOpenApiDocument } from "@dokploy/trpc-openapi";
|
||||||
import { TRPCError } from "@trpc/server";
|
import { TRPCError } from "@trpc/server";
|
||||||
import { sql } from "drizzle-orm";
|
import { sql } from "drizzle-orm";
|
||||||
@@ -657,6 +658,54 @@ export const settingsRouter = createTRPCRouter({
|
|||||||
}
|
}
|
||||||
return { status: "not_cloud" };
|
return { status: "not_cloud" };
|
||||||
}),
|
}),
|
||||||
|
setupGPU: adminProcedure
|
||||||
|
.input(
|
||||||
|
z.object({
|
||||||
|
serverId: z.string().optional(),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.mutation(async ({ input }) => {
|
||||||
|
if (IS_CLOUD && !input.serverId) {
|
||||||
|
throw new Error("Select a server to enable the GPU Setup");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await setupGPUSupport(input.serverId);
|
||||||
|
return { success: true };
|
||||||
|
} catch (error) {
|
||||||
|
console.error("GPU Setup Error:", error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}),
|
||||||
|
checkGPUStatus: adminProcedure
|
||||||
|
.input(
|
||||||
|
z.object({
|
||||||
|
serverId: z.string().optional(),
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
.query(async ({ input }) => {
|
||||||
|
if (IS_CLOUD && !input.serverId) {
|
||||||
|
return {
|
||||||
|
driverInstalled: false,
|
||||||
|
driverVersion: undefined,
|
||||||
|
gpuModel: undefined,
|
||||||
|
runtimeInstalled: false,
|
||||||
|
runtimeConfigured: false,
|
||||||
|
cudaSupport: undefined,
|
||||||
|
cudaVersion: undefined,
|
||||||
|
memoryInfo: undefined,
|
||||||
|
availableGPUs: 0,
|
||||||
|
swarmEnabled: false,
|
||||||
|
gpuResources: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return await checkGPUStatus(input.serverId || "");
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error("Failed to check GPU status");
|
||||||
|
}
|
||||||
|
}),
|
||||||
});
|
});
|
||||||
// {
|
// {
|
||||||
// "Parallelism": 1,
|
// "Parallelism": 1,
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ import {
|
|||||||
import type { Session, User } from "lucia";
|
import type { Session, User } from "lucia";
|
||||||
import superjson from "superjson";
|
import superjson from "superjson";
|
||||||
import { ZodError } from "zod";
|
import { ZodError } from "zod";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 1. CONTEXT
|
* 1. CONTEXT
|
||||||
*
|
*
|
||||||
|
|||||||
26
apps/dokploy/templates/blender/docker-compose.yml
Normal file
26
apps/dokploy/templates/blender/docker-compose.yml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
services:
|
||||||
|
blender:
|
||||||
|
image: lscr.io/linuxserver/blender:latest
|
||||||
|
runtime: nvidia
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: all
|
||||||
|
capabilities:
|
||||||
|
- gpu
|
||||||
|
environment:
|
||||||
|
- NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
- NVIDIA_DRIVER_CAPABILITIES=all
|
||||||
|
- PUID=1000
|
||||||
|
- PGID=1000
|
||||||
|
- TZ=Etc/UTC
|
||||||
|
- SUBFOLDER=/ #optional
|
||||||
|
ports:
|
||||||
|
- 3000
|
||||||
|
- 3001
|
||||||
|
restart: unless-stopped
|
||||||
|
shm_size: 1gb
|
||||||
34
apps/dokploy/templates/blender/index.ts
Normal file
34
apps/dokploy/templates/blender/index.ts
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
import {
|
||||||
|
type DomainSchema,
|
||||||
|
type Schema,
|
||||||
|
type Template,
|
||||||
|
generateHash,
|
||||||
|
generateRandomDomain,
|
||||||
|
} from "../utils";
|
||||||
|
|
||||||
|
export function generate(schema: Schema): Template {
|
||||||
|
const mainServiceHash = generateHash(schema.projectName);
|
||||||
|
const mainDomain = generateRandomDomain(schema);
|
||||||
|
|
||||||
|
const domains: DomainSchema[] = [
|
||||||
|
{
|
||||||
|
host: mainDomain,
|
||||||
|
port: 3000,
|
||||||
|
serviceName: "blender",
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const envs = [
|
||||||
|
"PUID=1000",
|
||||||
|
"PGID=1000",
|
||||||
|
"TZ=Etc/UTC",
|
||||||
|
"SUBFOLDER=/",
|
||||||
|
"NVIDIA_VISIBLE_DEVICES=all",
|
||||||
|
"NVIDIA_DRIVER_CAPABILITIES=all",
|
||||||
|
];
|
||||||
|
|
||||||
|
return {
|
||||||
|
envs,
|
||||||
|
domains,
|
||||||
|
};
|
||||||
|
}
|
||||||
@@ -716,5 +716,20 @@ export const templates: TemplateData[] = [
|
|||||||
},
|
},
|
||||||
tags: ["file", "sync"],
|
tags: ["file", "sync"],
|
||||||
load: () => import("./nextcloud-aio/index").then((m) => m.generate),
|
load: () => import("./nextcloud-aio/index").then((m) => m.generate),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
id: "blender",
|
||||||
|
name: "Blender",
|
||||||
|
version: "latest",
|
||||||
|
description:
|
||||||
|
"Blender is a free and open-source 3D creation suite. It supports the entire 3D pipeline—modeling, rigging, animation, simulation, rendering, compositing and motion tracking, video editing and 2D animation pipeline.",
|
||||||
|
logo: "blender.svg",
|
||||||
|
links: {
|
||||||
|
github: "https://github.com/linuxserver/docker-blender",
|
||||||
|
website: "https://www.blender.org/",
|
||||||
|
docs: "https://docs.blender.org/",
|
||||||
|
},
|
||||||
|
tags: ["3d", "rendering", "animation"],
|
||||||
|
load: () => import("./blender/index").then((m) => m.generate),
|
||||||
},
|
},
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -116,3 +116,4 @@ export * from "./monitoring/utilts";
|
|||||||
|
|
||||||
export * from "./db/validations/domain";
|
export * from "./db/validations/domain";
|
||||||
export * from "./db/validations/index";
|
export * from "./db/validations/index";
|
||||||
|
export * from "./utils/gpu-setup";
|
||||||
|
|||||||
349
packages/server/src/utils/gpu-setup.ts
Normal file
349
packages/server/src/utils/gpu-setup.ts
Normal file
@@ -0,0 +1,349 @@
|
|||||||
|
import * as fs from "node:fs/promises";
|
||||||
|
import { execAsync, sleep } from "../utils/process/execAsync";
|
||||||
|
import { execAsyncRemote } from "../utils/process/execAsync";
|
||||||
|
|
||||||
|
interface GPUInfo {
|
||||||
|
driverInstalled: boolean;
|
||||||
|
driverVersion?: string;
|
||||||
|
gpuModel?: string;
|
||||||
|
runtimeInstalled: boolean;
|
||||||
|
runtimeConfigured: boolean;
|
||||||
|
cudaSupport: boolean;
|
||||||
|
cudaVersion?: string;
|
||||||
|
memoryInfo?: string;
|
||||||
|
availableGPUs: number;
|
||||||
|
swarmEnabled: boolean;
|
||||||
|
gpuResources: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function checkGPUStatus(serverId?: string): Promise<GPUInfo> {
|
||||||
|
try {
|
||||||
|
const [driverInfo, runtimeInfo, swarmInfo, gpuInfo, cudaInfo] =
|
||||||
|
await Promise.all([
|
||||||
|
checkGpuDriver(serverId),
|
||||||
|
checkRuntime(serverId),
|
||||||
|
checkSwarmResources(serverId),
|
||||||
|
checkGpuInfo(serverId),
|
||||||
|
checkCudaSupport(serverId),
|
||||||
|
]);
|
||||||
|
|
||||||
|
return {
|
||||||
|
...driverInfo,
|
||||||
|
...runtimeInfo,
|
||||||
|
...swarmInfo,
|
||||||
|
...gpuInfo,
|
||||||
|
...cudaInfo,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Error in checkGPUStatus:", error);
|
||||||
|
return {
|
||||||
|
driverInstalled: false,
|
||||||
|
driverVersion: undefined,
|
||||||
|
runtimeInstalled: false,
|
||||||
|
runtimeConfigured: false,
|
||||||
|
cudaSupport: false,
|
||||||
|
cudaVersion: undefined,
|
||||||
|
gpuModel: undefined,
|
||||||
|
memoryInfo: undefined,
|
||||||
|
availableGPUs: 0,
|
||||||
|
swarmEnabled: false,
|
||||||
|
gpuResources: 0,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const checkGpuDriver = async (serverId?: string) => {
|
||||||
|
let driverVersion: string | undefined;
|
||||||
|
let driverInstalled = false;
|
||||||
|
let availableGPUs = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const driverCommand =
|
||||||
|
"nvidia-smi --query-gpu=driver_version --format=csv,noheader";
|
||||||
|
const { stdout: nvidiaSmi } = serverId
|
||||||
|
? await execAsyncRemote(serverId, driverCommand)
|
||||||
|
: await execAsync(driverCommand);
|
||||||
|
|
||||||
|
driverVersion = nvidiaSmi.trim();
|
||||||
|
if (driverVersion) {
|
||||||
|
driverInstalled = true;
|
||||||
|
const countCommand =
|
||||||
|
"nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l";
|
||||||
|
const { stdout: gpuCount } = serverId
|
||||||
|
? await execAsyncRemote(serverId, countCommand)
|
||||||
|
: await execAsync(countCommand);
|
||||||
|
|
||||||
|
availableGPUs = Number.parseInt(gpuCount.trim(), 10);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.debug("GPU driver check:", error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { driverVersion, driverInstalled, availableGPUs };
|
||||||
|
};
|
||||||
|
|
||||||
|
const checkRuntime = async (serverId?: string) => {
|
||||||
|
let runtimeInstalled = false;
|
||||||
|
let runtimeConfigured = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// First check: Is nvidia-container-runtime installed?
|
||||||
|
const checkBinaryCommand = "command -v nvidia-container-runtime";
|
||||||
|
try {
|
||||||
|
const { stdout } = serverId
|
||||||
|
? await execAsyncRemote(serverId, checkBinaryCommand)
|
||||||
|
: await execAsync(checkBinaryCommand);
|
||||||
|
runtimeInstalled = !!stdout.trim();
|
||||||
|
} catch (error) {
|
||||||
|
console.debug("Runtime binary check:", error);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second check: Is it configured in Docker?
|
||||||
|
try {
|
||||||
|
const runtimeCommand = 'docker info --format "{{json .Runtimes}}"';
|
||||||
|
const { stdout: runtimeInfo } = serverId
|
||||||
|
? await execAsyncRemote(serverId, runtimeCommand)
|
||||||
|
: await execAsync(runtimeCommand);
|
||||||
|
|
||||||
|
const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"';
|
||||||
|
const { stdout: defaultRuntime } = serverId
|
||||||
|
? await execAsyncRemote(serverId, defaultCommand)
|
||||||
|
: await execAsync(defaultCommand);
|
||||||
|
|
||||||
|
const runtimes = JSON.parse(runtimeInfo);
|
||||||
|
const hasNvidiaRuntime = "nvidia" in runtimes;
|
||||||
|
const isDefaultRuntime = defaultRuntime.trim() === "nvidia";
|
||||||
|
|
||||||
|
// Only set runtimeConfigured if both conditions are met
|
||||||
|
runtimeConfigured = hasNvidiaRuntime && isDefaultRuntime;
|
||||||
|
} catch (error) {
|
||||||
|
console.debug("Runtime configuration check:", error);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.debug("Runtime check:", error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { runtimeInstalled, runtimeConfigured };
|
||||||
|
};
|
||||||
|
|
||||||
|
const checkSwarmResources = async (serverId?: string) => {
|
||||||
|
let swarmEnabled = false;
|
||||||
|
let gpuResources = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const nodeCommand =
|
||||||
|
"docker node inspect self --format '{{json .Description.Resources.GenericResources}}'";
|
||||||
|
const { stdout: resources } = serverId
|
||||||
|
? await execAsyncRemote(serverId, nodeCommand)
|
||||||
|
: await execAsync(nodeCommand);
|
||||||
|
|
||||||
|
if (resources && resources !== "null") {
|
||||||
|
const genericResources = JSON.parse(resources);
|
||||||
|
for (const resource of genericResources) {
|
||||||
|
if (
|
||||||
|
resource.DiscreteResourceSpec &&
|
||||||
|
(resource.DiscreteResourceSpec.Kind === "GPU" ||
|
||||||
|
resource.DiscreteResourceSpec.Kind === "gpu")
|
||||||
|
) {
|
||||||
|
gpuResources = resource.DiscreteResourceSpec.Value;
|
||||||
|
swarmEnabled = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.debug("Swarm resource check:", error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { swarmEnabled, gpuResources };
|
||||||
|
};
|
||||||
|
|
||||||
|
const checkGpuInfo = async (serverId?: string) => {
|
||||||
|
let gpuModel: string | undefined;
|
||||||
|
let memoryInfo: string | undefined;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const gpuInfoCommand =
|
||||||
|
"nvidia-smi --query-gpu=gpu_name,memory.total --format=csv,noheader";
|
||||||
|
const { stdout: gpuInfo } = serverId
|
||||||
|
? await execAsyncRemote(serverId, gpuInfoCommand)
|
||||||
|
: await execAsync(gpuInfoCommand);
|
||||||
|
|
||||||
|
[gpuModel, memoryInfo] = gpuInfo.split(",").map((s) => s.trim());
|
||||||
|
} catch (error) {
|
||||||
|
console.debug("GPU info check:", error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { gpuModel, memoryInfo };
|
||||||
|
};
|
||||||
|
|
||||||
|
const checkCudaSupport = async (serverId?: string) => {
|
||||||
|
let cudaVersion: string | undefined;
|
||||||
|
let cudaSupport = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const cudaCommand = 'nvidia-smi -q | grep "CUDA Version"';
|
||||||
|
const { stdout: cudaInfo } = serverId
|
||||||
|
? await execAsyncRemote(serverId, cudaCommand)
|
||||||
|
: await execAsync(cudaCommand);
|
||||||
|
|
||||||
|
const cudaMatch = cudaInfo.match(/CUDA Version\s*:\s*([\d\.]+)/);
|
||||||
|
cudaVersion = cudaMatch ? cudaMatch[1] : undefined;
|
||||||
|
cudaSupport = !!cudaVersion;
|
||||||
|
} catch (error) {
|
||||||
|
console.debug("CUDA support check:", error);
|
||||||
|
}
|
||||||
|
|
||||||
|
return { cudaVersion, cudaSupport };
|
||||||
|
};
|
||||||
|
|
||||||
|
export async function setupGPUSupport(serverId?: string): Promise<void> {
|
||||||
|
try {
|
||||||
|
// 1. Initial status check and validation
|
||||||
|
const initialStatus = await checkGPUStatus(serverId);
|
||||||
|
const shouldContinue = await validatePrerequisites(initialStatus);
|
||||||
|
if (!shouldContinue) return;
|
||||||
|
|
||||||
|
// 2. Get node ID
|
||||||
|
const nodeId = await getNodeId(serverId);
|
||||||
|
|
||||||
|
// 3. Create daemon configuration
|
||||||
|
const daemonConfig = createDaemonConfig(initialStatus.availableGPUs);
|
||||||
|
|
||||||
|
// 4. Setup server based on environment
|
||||||
|
if (serverId) {
|
||||||
|
await setupRemoteServer(serverId, daemonConfig);
|
||||||
|
} else {
|
||||||
|
await setupLocalServer(daemonConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Wait for Docker restart
|
||||||
|
await sleep(10000);
|
||||||
|
|
||||||
|
// 6. Add GPU label
|
||||||
|
await addGpuLabel(nodeId, serverId);
|
||||||
|
|
||||||
|
// 7. Final verification
|
||||||
|
await sleep(5000);
|
||||||
|
await verifySetup(nodeId, serverId);
|
||||||
|
} catch (error) {
|
||||||
|
if (
|
||||||
|
error instanceof Error &&
|
||||||
|
error.message.includes("password is required")
|
||||||
|
) {
|
||||||
|
throw new Error(
|
||||||
|
"Sudo access required. Please run with appropriate permissions.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const validatePrerequisites = async (initialStatus: GPUInfo) => {
|
||||||
|
if (!initialStatus.driverInstalled) {
|
||||||
|
throw new Error(
|
||||||
|
"NVIDIA drivers not installed. Please install appropriate NVIDIA drivers first.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!initialStatus.runtimeInstalled) {
|
||||||
|
throw new Error(
|
||||||
|
"NVIDIA Container Runtime not installed. Please install nvidia-container-runtime first.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (initialStatus.swarmEnabled && initialStatus.runtimeConfigured) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
};
|
||||||
|
|
||||||
|
const getNodeId = async (serverId?: string) => {
|
||||||
|
const nodeIdCommand = 'docker info --format "{{.Swarm.NodeID}}"';
|
||||||
|
const { stdout: nodeId } = serverId
|
||||||
|
? await execAsyncRemote(serverId, nodeIdCommand)
|
||||||
|
: await execAsync(nodeIdCommand);
|
||||||
|
|
||||||
|
const trimmedNodeId = nodeId.trim();
|
||||||
|
if (!trimmedNodeId) {
|
||||||
|
throw new Error("Setup Server before enabling GPU support");
|
||||||
|
}
|
||||||
|
|
||||||
|
return trimmedNodeId;
|
||||||
|
};
|
||||||
|
|
||||||
|
const createDaemonConfig = (availableGPUs: number) => ({
|
||||||
|
runtimes: {
|
||||||
|
nvidia: {
|
||||||
|
path: "nvidia-container-runtime",
|
||||||
|
runtimeArgs: [],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"default-runtime": "nvidia",
|
||||||
|
"node-generic-resources": [`GPU=${availableGPUs}`],
|
||||||
|
});
|
||||||
|
|
||||||
|
const setupRemoteServer = async (serverId: string, daemonConfig: any) => {
|
||||||
|
const setupCommands = [
|
||||||
|
"sudo -n true",
|
||||||
|
`echo '${JSON.stringify(daemonConfig, null, 2)}' | sudo tee /etc/docker/daemon.json`,
|
||||||
|
"sudo mkdir -p /etc/nvidia-container-runtime",
|
||||||
|
'sudo sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml',
|
||||||
|
'echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" | sudo tee -a /etc/nvidia-container-runtime/config.toml',
|
||||||
|
"sudo systemctl daemon-reload",
|
||||||
|
"sudo systemctl restart docker",
|
||||||
|
].join(" && ");
|
||||||
|
|
||||||
|
await execAsyncRemote(serverId, setupCommands);
|
||||||
|
};
|
||||||
|
|
||||||
|
const setupLocalServer = async (daemonConfig: any) => {
|
||||||
|
const configFile = `/tmp/docker-daemon-${Date.now()}.json`;
|
||||||
|
await fs.writeFile(configFile, JSON.stringify(daemonConfig, null, 2));
|
||||||
|
|
||||||
|
const setupCommands = [
|
||||||
|
`pkexec sh -c '
|
||||||
|
cp ${configFile} /etc/docker/daemon.json &&
|
||||||
|
mkdir -p /etc/nvidia-container-runtime &&
|
||||||
|
sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml &&
|
||||||
|
echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" >> /etc/nvidia-container-runtime/config.toml &&
|
||||||
|
systemctl daemon-reload &&
|
||||||
|
systemctl restart docker
|
||||||
|
'`,
|
||||||
|
`rm ${configFile}`,
|
||||||
|
].join(" && ");
|
||||||
|
|
||||||
|
await execAsync(setupCommands);
|
||||||
|
};
|
||||||
|
|
||||||
|
const addGpuLabel = async (nodeId: string, serverId?: string) => {
|
||||||
|
const labelCommand = `docker node update --label-add gpu=true ${nodeId}`;
|
||||||
|
if (serverId) {
|
||||||
|
await execAsyncRemote(serverId, labelCommand);
|
||||||
|
} else {
|
||||||
|
await execAsync(labelCommand);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const verifySetup = async (nodeId: string, serverId?: string) => {
|
||||||
|
const finalStatus = await checkGPUStatus(serverId);
|
||||||
|
|
||||||
|
if (!finalStatus.swarmEnabled) {
|
||||||
|
const diagnosticCommands = [
|
||||||
|
`docker node inspect ${nodeId}`,
|
||||||
|
'nvidia-smi -a | grep "GPU UUID"',
|
||||||
|
"cat /etc/docker/daemon.json",
|
||||||
|
"cat /etc/nvidia-container-runtime/config.toml",
|
||||||
|
].join(" && ");
|
||||||
|
|
||||||
|
const { stdout: diagnostics } = serverId
|
||||||
|
? await execAsyncRemote(serverId, diagnosticCommands)
|
||||||
|
: await execAsync(diagnosticCommands);
|
||||||
|
|
||||||
|
console.error("Diagnostic Information:", diagnostics);
|
||||||
|
throw new Error("GPU support not detected in swarm after setup");
|
||||||
|
}
|
||||||
|
|
||||||
|
return finalStatus;
|
||||||
|
};
|
||||||
Reference in New Issue
Block a user