refactor: gpu support and docker setup improvements

- Add gpu status refresh with useEffect
- Update docker-compose.yml configuration
- Modify gpu setup scripts
- Improve gpu support checks
This commit is contained in:
vishalkadam47
2024-11-11 23:18:24 +05:30
parent 66c4d8f118
commit b817b4b6ee
3 changed files with 43 additions and 15 deletions

View File

@@ -11,7 +11,7 @@ import {
import { api } from "@/utils/api";
import { TRPCClientError } from "@trpc/client";
import { CheckCircle2, Cpu, Loader2, RefreshCw, XCircle } from "lucide-react";
import { useState } from "react";
import { useEffect, useState } from "react";
import { toast } from "sonner";
interface GPUSupportProps {
@@ -54,9 +54,18 @@ export function GPUSupport({ serverId }: GPUSupportProps) {
const handleRefresh = async () => {
setIsRefreshing(true);
await refetch();
setIsRefreshing(false);
try {
await utils.settings.checkGPUStatus.invalidate({ serverId });
await refetch();
} catch (error) {
toast.error("Failed to refresh GPU status");
} finally {
setIsRefreshing(false);
}
};
useEffect(() => {
handleRefresh();
}, []);
const handleEnableGPU = async () => {
if (serverId === undefined) {

View File

@@ -3,7 +3,6 @@ version: "3.8"
services:
blender:
image: lscr.io/linuxserver/blender:latest
container_name: blender
runtime: nvidia
deploy:
resources:

View File

@@ -87,20 +87,38 @@ const checkRuntime = async (serverId?: string) => {
let runtimeConfigured = false;
try {
const runtimeCommand = 'docker info --format "{{json .Runtimes}}"';
const { stdout: runtimeInfo } = serverId
? await execAsyncRemote(serverId, runtimeCommand)
: await execAsync(runtimeCommand);
// First check: Is nvidia-container-runtime installed?
const checkBinaryCommand = "command -v nvidia-container-runtime";
try {
const { stdout } = serverId
? await execAsyncRemote(serverId, checkBinaryCommand)
: await execAsync(checkBinaryCommand);
runtimeInstalled = !!stdout.trim();
} catch (error) {
console.debug("Runtime binary check:", error);
}
const runtimes = JSON.parse(runtimeInfo);
runtimeInstalled = "nvidia" in runtimes;
// Second check: Is it configured in Docker?
try {
const runtimeCommand = 'docker info --format "{{json .Runtimes}}"';
const { stdout: runtimeInfo } = serverId
? await execAsyncRemote(serverId, runtimeCommand)
: await execAsync(runtimeCommand);
const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"';
const { stdout: defaultRuntime } = serverId
? await execAsyncRemote(serverId, defaultCommand)
: await execAsync(defaultCommand);
const defaultCommand = 'docker info --format "{{.DefaultRuntime}}"';
const { stdout: defaultRuntime } = serverId
? await execAsyncRemote(serverId, defaultCommand)
: await execAsync(defaultCommand);
runtimeConfigured = defaultRuntime.trim() === "nvidia";
const runtimes = JSON.parse(runtimeInfo);
const hasNvidiaRuntime = "nvidia" in runtimes;
const isDefaultRuntime = defaultRuntime.trim() === "nvidia";
// Only set runtimeConfigured if both conditions are met
runtimeConfigured = hasNvidiaRuntime && isDefaultRuntime;
} catch (error) {
console.debug("Runtime configuration check:", error);
}
} catch (error) {
console.debug("Runtime check:", error);
}
@@ -279,6 +297,7 @@ const setupRemoteServer = async (serverId: string, daemonConfig: any) => {
"sudo -n true",
`echo '${JSON.stringify(daemonConfig, null, 2)}' | sudo tee /etc/docker/daemon.json`,
"sudo mkdir -p /etc/nvidia-container-runtime",
'sudo sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml',
'echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" | sudo tee -a /etc/nvidia-container-runtime/config.toml',
"sudo systemctl daemon-reload",
"sudo systemctl restart docker",
@@ -295,6 +314,7 @@ const setupLocalServer = async (daemonConfig: any) => {
`pkexec sh -c '
cp ${configFile} /etc/docker/daemon.json &&
mkdir -p /etc/nvidia-container-runtime &&
sed -i "/swarm-resource/d" /etc/nvidia-container-runtime/config.toml &&
echo "swarm-resource = \\"DOCKER_RESOURCE_GPU\\"" >> /etc/nvidia-container-runtime/config.toml &&
systemctl daemon-reload &&
systemctl restart docker