This commit is contained in:
Timothy Jaeryang Baek 2025-01-28 13:05:16 -08:00
parent 84b949bff3
commit 2b057ff73e

View File

@ -50,9 +50,13 @@ class LlamaCppServer:
def url(self): def url(self):
"""Return the URL where the server is running.""" """Return the URL where the server is running."""
if self._server_url is None: if self._server_url is None:
raise ValueError( # If the server URL is not available, ensure the server spins up again
"Server is not running. Start the server with a valid GGUF path." self._log("Server is off. Restarting the server...")
) self._start_server_in_thread()
self._start_auto_terminate_thread()
# Wait for the thread to start the server
while self._server_url is None:
time.sleep(1)
# Update the last-used timestamp whenever this property is accessed # Update the last-used timestamp whenever this property is accessed
self.last_used = time.time() self.last_used = time.time()
return self._server_url return self._server_url
@ -74,17 +78,19 @@ class LlamaCppServer:
def chat_completion(self, payload): def chat_completion(self, payload):
"""Send a chat completion request to the server.""" """Send a chat completion request to the server."""
if self._server_url is None: if self._server_url is None:
raise RuntimeError( self._log(
"Server is not running. Start the server before making requests." "Server is off. Restarting the server before making the request..."
) )
self._start_server_in_thread()
self._start_auto_terminate_thread()
# Wait for the thread to start the server
while self._server_url is None:
time.sleep(1)
# Reset the last-used timestamp # Reset the last-used timestamp
self.last_used = time.time() self.last_used = time.time()
endpoint = f"{self._server_url}/v1/chat/completions" endpoint = f"{self._server_url}/v1/chat/completions"
self._log(f"Sending chat completion request to {endpoint}...") self._log(f"Sending chat completion request to {endpoint}...")
response = requests.post(endpoint, json=payload) response = requests.post(endpoint, json=payload)
if response.status_code == 200: if response.status_code == 200:
self._log("Request successful.") self._log("Request successful.")
return response.json() return response.json()
@ -136,16 +142,13 @@ class LlamaCppServer:
raise ValueError( raise ValueError(
f"GGUF model path is not specified or invalid: {self.gguf_path}" f"GGUF model path is not specified or invalid: {self.gguf_path}"
) )
server_binary = os.path.join( server_binary = os.path.join(
self.llama_cpp_path, "build", "bin", "llama-server" self.llama_cpp_path, "build", "bin", "llama-server"
) )
if not os.path.exists(server_binary): if not os.path.exists(server_binary):
raise FileNotFoundError(f"Server binary not found: {server_binary}") raise FileNotFoundError(f"Server binary not found: {server_binary}")
# Ensure the binary is executable # Ensure the binary is executable
self._set_executable(server_binary) self._set_executable(server_binary)
# Find an available port # Find an available port
self.port = self._find_available_port(start_port=10000) self.port = self._find_available_port(start_port=10000)
if self.port is None: if self.port is None: