uv init

2025-06-26 18:16:12 +00:00 · 2025-01-27 18:21:36 -08:00 · 2025-01-27 18:21:36 -08:00 · 79e2046a28
commit 79e2046a28
parent 55199b2827
5 changed files with 215 additions and 147 deletions
--- a/.python-version
+++ b/.python-version
@ -0,0 +1 @@
 3.11
--- a/README.md
+++ b/README.md
--- a/hello.py
+++ b/hello.py
@ -0,0 +1,6 @@
 def main():
    print("Hello from llama-cpp-runner!")
 if __name__ == "__main__":
    main()
--- a/main.py
+++ b/main.py
@ -2,168 +2,222 @@ import os
 import platform
 import requests
 import zipfile
 import argparse
 import json
-from packaging import version
+import subprocess
 import threading
 import stat
 import time
 import socket
-def log(message, verbose=False):
+class LlamaCppServer:
-    if verbose:
+    def __init__(
-        print(message)
+        self, llama_cpp_path=None, gguf_path=None, cache_dir="./cache", verbose=False
    ):
        self.verbose = verbose
        self.cache_dir = cache_dir
        self.llama_cpp_path = llama_cpp_path
        self.gguf_path = gguf_path
        self.server_process = None
        self._server_url = None
        self._server_thread = None
        self.port = None
        # Fetch or validate llama path
        if llama_cpp_path is None:
            self.llama_cpp_path = self._install_llama_cpp_binaries()
        elif not os.path.exists(llama_cpp_path):
            raise FileNotFoundError(
                f"Specified llama_cpp_path not found: {llama_cpp_path}"
            )
-def get_latest_release(verbose=False):
+        # Start the server if gguf_path is provided
-    api_url = "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest"
+        if gguf_path:
-    log(f"Fetching latest release info from: {api_url}", verbose)
+            self._start_server_in_thread()
-    response = requests.get(api_url)
+
-    if response.status_code == 200:
+    @property
-        return response.json()
+    def url(self):
-    else:
+        """Return the URL where the server is running."""
-        raise Exception(
+        if self._server_url is None:
-            f"Failed to fetch release info. Status code: {response.status_code}"
+            raise ValueError(
                "Server is not running. Start the server with a valid GGUF path."
            )
        return self._server_url
    def kill(self):
        """Kill the server process and clean up."""
        if self.server_process and self.server_process.poll() is None:
            self.server_process.terminate()
            self.server_process.wait()
            self.server_process = None
            self._server_url = None
            self.port = None
            self._log("Llama server successfully killed.")
        if self._server_thread and self._server_thread.is_alive():
            self._server_thread.join()
    def _start_server_in_thread(self):
        """Start the server in a separate thread."""
        def target():
            try:
                self._start_server()
            except Exception as e:
                self._log(f"Failed to start server: {e}")
        self._server_thread = threading.Thread(target=target, daemon=True)
        self._server_thread.start()
    def _start_server(self):
        """Start the llama-server."""
        if not self.gguf_path or not os.path.exists(self.gguf_path):
            raise ValueError(
                f"GGUF model path is not specified or invalid: {self.gguf_path}"
            )
        server_binary = os.path.join(
            self.llama_cpp_path, "build", "bin", "llama-server"
        )
        if not os.path.exists(server_binary):
            raise FileNotFoundError(f"Server binary not found: {server_binary}")
        # Ensure the binary is executable
        self._set_executable(server_binary)
        # Find an available port
        self.port = self._find_available_port(start_port=10000)
        if self.port is None:
            raise RuntimeError("No available port found between 10000 and 11000.")
        self._log(f"Starting server with binary: {server_binary}")
        self._log(f"Using GGUF path: {self.gguf_path}")
        self._log(f"Using port: {self.port}")
        self.server_process = subprocess.Popen(
            [server_binary, "-m", self.gguf_path, "--port", str(self.port)],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
        )
        # Wait for the server to confirm it is ready by monitoring its output
        self._server_url = None
        for line in iter(self.server_process.stdout.readline, ""):
            self._log(line.strip())
            if "Listening on" in line:
                self._server_url = f"http://localhost:{self.port}"
                self._log(f"Server is now accessible at {self._server_url}")
                break
-def get_appropriate_asset(assets, verbose=False):
+        if not self._server_url:
-    system = platform.system().lower()
+            raise RuntimeError("Failed to confirm server is running.")
    machine = platform.machine().lower()
    processor = platform.processor()
    log(f"System: {system}", verbose)
    log(f"Machine: {machine}", verbose)
    log(f"Processor: {processor}", verbose)
    if system == "windows":
        if "arm" in machine:
            asset = next((a for a in assets if "win-arm64" in a["name"]), None)
        else:
            if "avx512" in processor:
                asset = next((a for a in assets if "win-avx512-x64" in a["name"]), None)
            elif "avx2" in processor:
                asset = next((a for a in assets if "win-avx2-x64" in a["name"]), None)
            elif "avx" in processor:
                asset = next((a for a in assets if "win-avx-x64" in a["name"]), None)
            else:
                asset = next((a for a in assets if "win-noavx-x64" in a["name"]), None)
    elif system == "darwin":
        if "arm" in machine:
            asset = next((a for a in assets if "macos-arm64" in a["name"]), None)
        else:
            asset = next((a for a in assets if "macos-x64" in a["name"]), None)
    elif system == "linux":
        asset = next((a for a in assets if "ubuntu-x64" in a["name"]), None)
    else:
        asset = None
    log(f"Selected asset: {asset['name'] if asset else None}", verbose)
    return asset
    def _find_available_port(self, start_port=10000, end_port=11000):
        """Find an available port between `start_port` and `end_port`."""
        for port in range(start_port, end_port):
            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
                if sock.connect_ex(("localhost", port)) != 0:
                    return port
        return None
-def set_executable(file_path):
+    def _set_executable(self, file_path):
-    current_mode = os.stat(file_path).st_mode
+        """Ensure the file at `file_path` is executable."""
    os.chmod(file_path, current_mode | stat.S_IEXEC)
 def download_and_unzip(url, asset_name, cache_dir, verbose=False):
    os.makedirs(cache_dir, exist_ok=True)
    zip_path = os.path.join(cache_dir, asset_name)
    log(f"Downloading from: {url}", verbose)
    response = requests.get(url)
    if response.status_code == 200:
        with open(zip_path, "wb") as file:
            file.write(response.content)
        log(f"Downloaded: {asset_name}", verbose)
        extract_dir = os.path.join(cache_dir, "llama_cpp")
        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(extract_dir)
        log(f"Extracted to: {extract_dir}", verbose)
        # Set execute permissions for all extracted files
        if platform.system() != "Windows":
-            for root, dirs, files in os.walk(extract_dir):
+            current_mode = os.stat(file_path).st_mode
-                for file in files:
+            os.chmod(
-                    file_path = os.path.join(root, file)
+                file_path, current_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
-                    set_executable(file_path)
+            )
            log("Set execute permissions for extracted files", verbose)
        else:
            log("Skipping permission setting on Windows", verbose)
        print(f"Successfully downloaded and extracted {asset_name}")
        return True
    else:
        print(f"Failed to download {asset_name}")
        return False
    def _log(self, message):
        """Print a log message if verbosity is enabled."""
        if self.verbose:
            print(f"[LlamaCppServer] {message}")
-def check_cache(release_info, asset, cache_dir, verbose=False):
+    def _install_llama_cpp_binaries(self):
-    cache_info_path = os.path.join(cache_dir, "cache_info.json")
+        """Download and install llama.cpp binaries."""
-    if os.path.exists(cache_info_path):
+        self._log("Installing llama.cpp binaries...")
-        with open(cache_info_path, "r") as f:
+        release_info = self._get_latest_release()
            cache_info = json.load(f)
        if (
            cache_info.get("tag_name") == release_info["tag_name"]
            and cache_info.get("asset_name") == asset["name"]
        ):
            log("Latest version already downloaded.", verbose)
            return True
    return False
 def update_cache_info(release_info, asset, cache_dir):
    cache_info = {"tag_name": release_info["tag_name"], "asset_name": asset["name"]}
    cache_info_path = os.path.join(cache_dir, "cache_info.json")
    with open(cache_info_path, "w") as f:
        json.dump(cache_info, f)
 def unzip_asset(asset_name, cache_dir, verbose=False):
    zip_path = os.path.join(cache_dir, asset_name)
    extract_dir = os.path.join(cache_dir, "llama_cpp")
    if os.path.exists(zip_path):
        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(extract_dir)
        log(f"Extracted to: {extract_dir}", verbose)
        # Set execute permissions for all extracted files
        if platform.system() != "Windows":
            for root, dirs, files in os.walk(extract_dir):
                for file in files:
                    file_path = os.path.join(root, file)
                    set_executable(file_path)
            log("Set execute permissions for extracted files", verbose)
        else:
            log("Skipping permission setting on Windows", verbose)
        print(f"Successfully extracted {asset_name}")
        return True
    else:
        print(f"Zip file not found: {asset_name}")
        return False
 def main():
    parser = argparse.ArgumentParser(
        description="Download and extract llama.cpp binaries"
    )
    parser.add_argument(
        "-v", "--verbose", action="store_true", help="Increase output verbosity"
    )
    parser.add_argument("-c", "--cache", default="./cache", help="Cache directory")
    args = parser.parse_args()
    try:
        release_info = get_latest_release(args.verbose)
        assets = release_info["assets"]
-        appropriate_asset = get_appropriate_asset(assets, args.verbose)
+        asset = self._get_appropriate_asset(assets)
-        if appropriate_asset:
+        if not asset:
-            asset_name = appropriate_asset["name"]
+            raise RuntimeError("No appropriate binary found for your system.")
-            if check_cache(release_info, appropriate_asset, args.cache, args.verbose):
+        asset_name = asset["name"]
-                print("Latest version already downloaded. Extracting cached version.")
+        if self._check_cache(release_info, asset):
-                unzip_asset(asset_name, args.cache, args.verbose)
+            self._log("Using cached llama.cpp binaries.")
            else:
                download_url = appropriate_asset["browser_download_url"]
                if download_and_unzip(
                    download_url, asset_name, args.cache, args.verbose
                ):
                    update_cache_info(release_info, appropriate_asset, args.cache)
        else:
-            print("No appropriate binary found for your system.")
+            self._download_and_unzip(asset["browser_download_url"], asset_name)
-    except Exception as e:
+            self._update_cache_info(release_info, asset)
-        print(f"An error occurred: {str(e)}")
+        return os.path.join(self.cache_dir, "llama_cpp")
    def _get_latest_release(self):
        """Fetch the latest release of llama.cpp from GitHub."""
        api_url = "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest"
        response = requests.get(api_url)
        if response.status_code == 200:
            return response.json()
        else:
            raise RuntimeError(
                f"Failed to fetch release info. Status code: {response.status_code}"
            )
-if __name__ == "__main__":
+    def _get_appropriate_asset(self, assets):
-    main()
+        """Select the appropriate binary asset for the current system."""
        system = platform.system().lower()
        machine = platform.machine().lower()
        processor = platform.processor()
        if system == "windows":
            if "arm" in machine:
                return next((a for a in assets if "win-arm64" in a["name"]), None)
            elif "avx512" in processor:
                return next((a for a in assets if "win-avx512-x64" in a["name"]), None)
            elif "avx2" in processor:
                return next((a for a in assets if "win-avx2-x64" in a["name"]), None)
            elif "avx" in processor:
                return next((a for a in assets if "win-avx-x64" in a["name"]), None)
            else:
                return next((a for a in assets if "win-noavx-x64" in a["name"]), None)
        elif system == "darwin":
            if "arm" in machine:
                return next((a for a in assets if "macos-arm64" in a["name"]), None)
            else:
                return next((a for a in assets if "macos-x64" in a["name"]), None)
        elif system == "linux":
            return next((a for a in assets if "ubuntu-x64" in a["name"]), None)
        return None
    def _check_cache(self, release_info, asset):
        """Check whether the latest binaries are already cached."""
        cache_info_path = os.path.join(self.cache_dir, "cache_info.json")
        if os.path.exists(cache_info_path):
            with open(cache_info_path, "r") as f:
                cache_info = json.load(f)
            if (
                cache_info.get("tag_name") == release_info["tag_name"]
                and cache_info.get("asset_name") == asset["name"]
            ):
                return True
        return False
    def _download_and_unzip(self, url, asset_name):
        """Download and extract llama.cpp binaries."""
        os.makedirs(self.cache_dir, exist_ok=True)
        zip_path = os.path.join(self.cache_dir, asset_name)
        self._log(f"Downloading binary from: {url}")
        response = requests.get(url)
        if response.status_code == 200:
            with open(zip_path, "wb") as file:
                file.write(response.content)
            self._log(f"Successfully downloaded: {asset_name}")
        else:
            raise RuntimeError(f"Failed to download binary: {url}")
        extract_dir = os.path.join(self.cache_dir, "llama_cpp")
        with zipfile.ZipFile(zip_path, "r") as zip_ref:
            zip_ref.extractall(extract_dir)
        self._log(f"Extracted binaries to: {extract_dir}")
    def _update_cache_info(self, release_info, asset):
        """Update cache metadata with the downloaded release info."""
        cache_info = {"tag_name": release_info["tag_name"], "asset_name": asset["name"]}
        cache_info_path = os.path.join(self.cache_dir, "cache_info.json")
        with open(cache_info_path, "w") as f:
            json.dump(cache_info, f)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,7 @@
 [project]
 name = "llama-cpp-runner"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = []