From 79e2046a28d4ef93b94d44e8963064ba17ea9019 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Mon, 27 Jan 2025 18:21:36 -0800 Subject: [PATCH] uv init --- .python-version | 1 + README.md | 0 hello.py | 6 + main.py | 348 ++++++++++++++++++++++++++++-------------------- pyproject.toml | 7 + 5 files changed, 215 insertions(+), 147 deletions(-) create mode 100644 .python-version create mode 100644 README.md create mode 100644 hello.py create mode 100644 pyproject.toml diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..2c07333 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11 diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/hello.py b/hello.py new file mode 100644 index 0000000..f19c9e2 --- /dev/null +++ b/hello.py @@ -0,0 +1,6 @@ +def main(): + print("Hello from llama-cpp-runner!") + + +if __name__ == "__main__": + main() diff --git a/main.py b/main.py index 78cec40..8e03a7d 100644 --- a/main.py +++ b/main.py @@ -2,168 +2,222 @@ import os import platform import requests import zipfile -import argparse import json -from packaging import version +import subprocess +import threading import stat +import time +import socket -def log(message, verbose=False): - if verbose: - print(message) +class LlamaCppServer: + def __init__( + self, llama_cpp_path=None, gguf_path=None, cache_dir="./cache", verbose=False + ): + self.verbose = verbose + self.cache_dir = cache_dir + self.llama_cpp_path = llama_cpp_path + self.gguf_path = gguf_path + self.server_process = None + self._server_url = None + self._server_thread = None + self.port = None + # Fetch or validate llama path + if llama_cpp_path is None: + self.llama_cpp_path = self._install_llama_cpp_binaries() + elif not os.path.exists(llama_cpp_path): + raise FileNotFoundError( + f"Specified llama_cpp_path not found: {llama_cpp_path}" + ) -def get_latest_release(verbose=False): - api_url = "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest" - log(f"Fetching latest release info from: {api_url}", verbose) - response = requests.get(api_url) - if response.status_code == 200: - return response.json() - else: - raise Exception( - f"Failed to fetch release info. Status code: {response.status_code}" + # Start the server if gguf_path is provided + if gguf_path: + self._start_server_in_thread() + + @property + def url(self): + """Return the URL where the server is running.""" + if self._server_url is None: + raise ValueError( + "Server is not running. Start the server with a valid GGUF path." + ) + return self._server_url + + def kill(self): + """Kill the server process and clean up.""" + if self.server_process and self.server_process.poll() is None: + self.server_process.terminate() + self.server_process.wait() + self.server_process = None + self._server_url = None + self.port = None + self._log("Llama server successfully killed.") + if self._server_thread and self._server_thread.is_alive(): + self._server_thread.join() + + def _start_server_in_thread(self): + """Start the server in a separate thread.""" + + def target(): + try: + self._start_server() + except Exception as e: + self._log(f"Failed to start server: {e}") + + self._server_thread = threading.Thread(target=target, daemon=True) + self._server_thread.start() + + def _start_server(self): + """Start the llama-server.""" + if not self.gguf_path or not os.path.exists(self.gguf_path): + raise ValueError( + f"GGUF model path is not specified or invalid: {self.gguf_path}" + ) + + server_binary = os.path.join( + self.llama_cpp_path, "build", "bin", "llama-server" + ) + if not os.path.exists(server_binary): + raise FileNotFoundError(f"Server binary not found: {server_binary}") + + # Ensure the binary is executable + self._set_executable(server_binary) + + # Find an available port + self.port = self._find_available_port(start_port=10000) + if self.port is None: + raise RuntimeError("No available port found between 10000 and 11000.") + + self._log(f"Starting server with binary: {server_binary}") + self._log(f"Using GGUF path: {self.gguf_path}") + self._log(f"Using port: {self.port}") + + self.server_process = subprocess.Popen( + [server_binary, "-m", self.gguf_path, "--port", str(self.port)], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + universal_newlines=True, ) + # Wait for the server to confirm it is ready by monitoring its output + self._server_url = None + for line in iter(self.server_process.stdout.readline, ""): + self._log(line.strip()) + if "Listening on" in line: + self._server_url = f"http://localhost:{self.port}" + self._log(f"Server is now accessible at {self._server_url}") + break -def get_appropriate_asset(assets, verbose=False): - system = platform.system().lower() - machine = platform.machine().lower() - processor = platform.processor() - log(f"System: {system}", verbose) - log(f"Machine: {machine}", verbose) - log(f"Processor: {processor}", verbose) - if system == "windows": - if "arm" in machine: - asset = next((a for a in assets if "win-arm64" in a["name"]), None) - else: - if "avx512" in processor: - asset = next((a for a in assets if "win-avx512-x64" in a["name"]), None) - elif "avx2" in processor: - asset = next((a for a in assets if "win-avx2-x64" in a["name"]), None) - elif "avx" in processor: - asset = next((a for a in assets if "win-avx-x64" in a["name"]), None) - else: - asset = next((a for a in assets if "win-noavx-x64" in a["name"]), None) - elif system == "darwin": - if "arm" in machine: - asset = next((a for a in assets if "macos-arm64" in a["name"]), None) - else: - asset = next((a for a in assets if "macos-x64" in a["name"]), None) - elif system == "linux": - asset = next((a for a in assets if "ubuntu-x64" in a["name"]), None) - else: - asset = None - log(f"Selected asset: {asset['name'] if asset else None}", verbose) - return asset + if not self._server_url: + raise RuntimeError("Failed to confirm server is running.") + def _find_available_port(self, start_port=10000, end_port=11000): + """Find an available port between `start_port` and `end_port`.""" + for port in range(start_port, end_port): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: + if sock.connect_ex(("localhost", port)) != 0: + return port + return None -def set_executable(file_path): - current_mode = os.stat(file_path).st_mode - os.chmod(file_path, current_mode | stat.S_IEXEC) - - -def download_and_unzip(url, asset_name, cache_dir, verbose=False): - os.makedirs(cache_dir, exist_ok=True) - zip_path = os.path.join(cache_dir, asset_name) - log(f"Downloading from: {url}", verbose) - response = requests.get(url) - if response.status_code == 200: - with open(zip_path, "wb") as file: - file.write(response.content) - log(f"Downloaded: {asset_name}", verbose) - extract_dir = os.path.join(cache_dir, "llama_cpp") - with zipfile.ZipFile(zip_path, "r") as zip_ref: - zip_ref.extractall(extract_dir) - log(f"Extracted to: {extract_dir}", verbose) - # Set execute permissions for all extracted files + def _set_executable(self, file_path): + """Ensure the file at `file_path` is executable.""" if platform.system() != "Windows": - for root, dirs, files in os.walk(extract_dir): - for file in files: - file_path = os.path.join(root, file) - set_executable(file_path) - log("Set execute permissions for extracted files", verbose) - else: - log("Skipping permission setting on Windows", verbose) - print(f"Successfully downloaded and extracted {asset_name}") - return True - else: - print(f"Failed to download {asset_name}") - return False + current_mode = os.stat(file_path).st_mode + os.chmod( + file_path, current_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + ) + def _log(self, message): + """Print a log message if verbosity is enabled.""" + if self.verbose: + print(f"[LlamaCppServer] {message}") -def check_cache(release_info, asset, cache_dir, verbose=False): - cache_info_path = os.path.join(cache_dir, "cache_info.json") - if os.path.exists(cache_info_path): - with open(cache_info_path, "r") as f: - cache_info = json.load(f) - if ( - cache_info.get("tag_name") == release_info["tag_name"] - and cache_info.get("asset_name") == asset["name"] - ): - log("Latest version already downloaded.", verbose) - return True - return False - - -def update_cache_info(release_info, asset, cache_dir): - cache_info = {"tag_name": release_info["tag_name"], "asset_name": asset["name"]} - cache_info_path = os.path.join(cache_dir, "cache_info.json") - with open(cache_info_path, "w") as f: - json.dump(cache_info, f) - - -def unzip_asset(asset_name, cache_dir, verbose=False): - zip_path = os.path.join(cache_dir, asset_name) - extract_dir = os.path.join(cache_dir, "llama_cpp") - if os.path.exists(zip_path): - with zipfile.ZipFile(zip_path, "r") as zip_ref: - zip_ref.extractall(extract_dir) - log(f"Extracted to: {extract_dir}", verbose) - # Set execute permissions for all extracted files - if platform.system() != "Windows": - for root, dirs, files in os.walk(extract_dir): - for file in files: - file_path = os.path.join(root, file) - set_executable(file_path) - log("Set execute permissions for extracted files", verbose) - else: - log("Skipping permission setting on Windows", verbose) - print(f"Successfully extracted {asset_name}") - return True - else: - print(f"Zip file not found: {asset_name}") - return False - - -def main(): - parser = argparse.ArgumentParser( - description="Download and extract llama.cpp binaries" - ) - parser.add_argument( - "-v", "--verbose", action="store_true", help="Increase output verbosity" - ) - parser.add_argument("-c", "--cache", default="./cache", help="Cache directory") - args = parser.parse_args() - try: - release_info = get_latest_release(args.verbose) + def _install_llama_cpp_binaries(self): + """Download and install llama.cpp binaries.""" + self._log("Installing llama.cpp binaries...") + release_info = self._get_latest_release() assets = release_info["assets"] - appropriate_asset = get_appropriate_asset(assets, args.verbose) - if appropriate_asset: - asset_name = appropriate_asset["name"] - if check_cache(release_info, appropriate_asset, args.cache, args.verbose): - print("Latest version already downloaded. Extracting cached version.") - unzip_asset(asset_name, args.cache, args.verbose) - else: - download_url = appropriate_asset["browser_download_url"] - if download_and_unzip( - download_url, asset_name, args.cache, args.verbose - ): - update_cache_info(release_info, appropriate_asset, args.cache) + asset = self._get_appropriate_asset(assets) + if not asset: + raise RuntimeError("No appropriate binary found for your system.") + asset_name = asset["name"] + if self._check_cache(release_info, asset): + self._log("Using cached llama.cpp binaries.") else: - print("No appropriate binary found for your system.") - except Exception as e: - print(f"An error occurred: {str(e)}") + self._download_and_unzip(asset["browser_download_url"], asset_name) + self._update_cache_info(release_info, asset) + return os.path.join(self.cache_dir, "llama_cpp") + def _get_latest_release(self): + """Fetch the latest release of llama.cpp from GitHub.""" + api_url = "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest" + response = requests.get(api_url) + if response.status_code == 200: + return response.json() + else: + raise RuntimeError( + f"Failed to fetch release info. Status code: {response.status_code}" + ) -if __name__ == "__main__": - main() + def _get_appropriate_asset(self, assets): + """Select the appropriate binary asset for the current system.""" + system = platform.system().lower() + machine = platform.machine().lower() + processor = platform.processor() + if system == "windows": + if "arm" in machine: + return next((a for a in assets if "win-arm64" in a["name"]), None) + elif "avx512" in processor: + return next((a for a in assets if "win-avx512-x64" in a["name"]), None) + elif "avx2" in processor: + return next((a for a in assets if "win-avx2-x64" in a["name"]), None) + elif "avx" in processor: + return next((a for a in assets if "win-avx-x64" in a["name"]), None) + else: + return next((a for a in assets if "win-noavx-x64" in a["name"]), None) + elif system == "darwin": + if "arm" in machine: + return next((a for a in assets if "macos-arm64" in a["name"]), None) + else: + return next((a for a in assets if "macos-x64" in a["name"]), None) + elif system == "linux": + return next((a for a in assets if "ubuntu-x64" in a["name"]), None) + return None + + def _check_cache(self, release_info, asset): + """Check whether the latest binaries are already cached.""" + cache_info_path = os.path.join(self.cache_dir, "cache_info.json") + if os.path.exists(cache_info_path): + with open(cache_info_path, "r") as f: + cache_info = json.load(f) + if ( + cache_info.get("tag_name") == release_info["tag_name"] + and cache_info.get("asset_name") == asset["name"] + ): + return True + return False + + def _download_and_unzip(self, url, asset_name): + """Download and extract llama.cpp binaries.""" + os.makedirs(self.cache_dir, exist_ok=True) + zip_path = os.path.join(self.cache_dir, asset_name) + self._log(f"Downloading binary from: {url}") + response = requests.get(url) + if response.status_code == 200: + with open(zip_path, "wb") as file: + file.write(response.content) + self._log(f"Successfully downloaded: {asset_name}") + else: + raise RuntimeError(f"Failed to download binary: {url}") + extract_dir = os.path.join(self.cache_dir, "llama_cpp") + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(extract_dir) + self._log(f"Extracted binaries to: {extract_dir}") + + def _update_cache_info(self, release_info, asset): + """Update cache metadata with the downloaded release info.""" + cache_info = {"tag_name": release_info["tag_name"], "asset_name": asset["name"]} + cache_info_path = os.path.join(self.cache_dir, "cache_info.json") + with open(cache_info_path, "w") as f: + json.dump(cache_info, f) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..ac98692 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,7 @@ +[project] +name = "llama-cpp-runner" +version = "0.1.0" +description = "Add your description here" +readme = "README.md" +requires-python = ">=3.11" +dependencies = []