From 2b057ff73e99cb4a7fb33a0c8536f0377dc32bd7 Mon Sep 17 00:00:00 2001
From: Timothy Jaeryang Baek <tim@openwebui.com>
Date: Tue, 28 Jan 2025 13:05:16 -0800
Subject: [PATCH] refac

---
 src/llama_cpp_runner/main.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/llama_cpp_runner/main.py b/src/llama_cpp_runner/main.py
index 6ab7b8b..73961e1 100644
--- a/src/llama_cpp_runner/main.py
+++ b/src/llama_cpp_runner/main.py
@@ -50,9 +50,13 @@ class LlamaCppServer:
     def url(self):
         """Return the URL where the server is running."""
         if self._server_url is None:
-            raise ValueError(
-                "Server is not running. Start the server with a valid GGUF path."
-            )
+            # If the server URL is not available, ensure the server spins up again
+            self._log("Server is off. Restarting the server...")
+            self._start_server_in_thread()
+            self._start_auto_terminate_thread()
+            # Wait for the thread to start the server
+            while self._server_url is None:
+                time.sleep(1)
         # Update the last-used timestamp whenever this property is accessed
         self.last_used = time.time()
         return self._server_url
@@ -74,17 +78,19 @@ class LlamaCppServer:
     def chat_completion(self, payload):
         """Send a chat completion request to the server."""
         if self._server_url is None:
-            raise RuntimeError(
-                "Server is not running. Start the server before making requests."
+            self._log(
+                "Server is off. Restarting the server before making the request..."
             )
-
+            self._start_server_in_thread()
+            self._start_auto_terminate_thread()
+            # Wait for the thread to start the server
+            while self._server_url is None:
+                time.sleep(1)
         # Reset the last-used timestamp
         self.last_used = time.time()
-
         endpoint = f"{self._server_url}/v1/chat/completions"
         self._log(f"Sending chat completion request to {endpoint}...")
         response = requests.post(endpoint, json=payload)
-
         if response.status_code == 200:
             self._log("Request successful.")
             return response.json()
@@ -136,16 +142,13 @@ class LlamaCppServer:
             raise ValueError(
                 f"GGUF model path is not specified or invalid: {self.gguf_path}"
             )
-
         server_binary = os.path.join(
             self.llama_cpp_path, "build", "bin", "llama-server"
         )
         if not os.path.exists(server_binary):
             raise FileNotFoundError(f"Server binary not found: {server_binary}")
-
         # Ensure the binary is executable
         self._set_executable(server_binary)
-
         # Find an available port
         self.port = self._find_available_port(start_port=10000)
         if self.port is None: