diff --git a/pyproject.toml b/pyproject.toml
index ec386f8..46c1800 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "llama-cpp-runner"
 version = "0.0.1"
-description = "A runner for llama-cpp"
+description = "Quick and easy way to run large language models (LLMs) with llama.cpp"
 readme = "README.md"
 authors = [
     { name = "Timothy Jaeryang Baek", email = "tim@openwebui.com" }
diff --git a/src/llama_cpp_runner/main.py b/src/llama_cpp_runner/main.py
index cb98903..da0d819 100644
--- a/src/llama_cpp_runner/main.py
+++ b/src/llama_cpp_runner/main.py
@@ -9,27 +9,41 @@ import stat
 import time
 import socket
 
+import os
+import platform
+import requests
+import zipfile
+import json
+
 
 class LlamaCpp:
     def __init__(
-        self, models_dir, cache_dir="./cache", verbose=False, timeout_minutes=5
+        self,
+        models_dir,
+        cache_dir="~/.llama_cpp_runner",
+        verbose=False,
+        timeout_minutes=5,
+        pinned_version=None,
     ):
         """
         Initialize the LlamaCpp class.
-
         Args:
             models_dir (str): Directory where GGUF models are stored.
-            cache_dir (str): Directory to store llama.cpp binaries and related assets.
+            cache_dir (str): Directory to store llama.cpp binaries and related assets. Defaults to '~/.llama_cpp_runner'.
             verbose (bool): Whether to enable verbose logging.
             timeout_minutes (int): Timeout for shutting down idle servers.
+            pinned_version (str or None): Pinned release version of llama.cpp binaries.
         """
         self.models_dir = models_dir
-        self.cache_dir = cache_dir
+        self.cache_dir = os.path.expanduser(
+            cache_dir
+        )  # Ensure cache is in a fixed location
         self.verbose = verbose
         self.timeout_minutes = timeout_minutes
+        self.pinned_version = pinned_version  # Optional pinned version
         self.llama_cpp_path = (
             self._install_llama_cpp_binaries()
-        )  # Handle binaries installation
+        )  # Install the required binaries
         self.servers = (
             {}
         )  # Maintain a mapping of model names to LlamaCppServer instances
@@ -37,7 +51,6 @@ class LlamaCpp:
     def list_models(self):
         """
         List all GGUF models available in the `models_dir`.
-
         Returns:
             list: A list of model names (files ending in ".gguf").
         """
@@ -51,37 +64,29 @@ class LlamaCpp:
     def chat_completion(self, body):
         """
         Handle chat completion requests.
-
         Args:
             body (dict): The payload for the chat completion request. It must contain the "model" key.
-
         Returns:
             dict or generator: Response from the server (non-streaming or streaming mode).
         """
         if "model" not in body:
             raise ValueError("The request body must contain a 'model' key.")
-
         model_name = body["model"]
         gguf_path = os.path.join(self.models_dir, model_name)
-
         if not os.path.exists(gguf_path):
             raise FileNotFoundError(f"Model file not found: {gguf_path}")
-
         # Check if the server for this model is already running
         if model_name not in self.servers or not self.servers[model_name]._server_url:
             self._log(f"Initializing a new server for model: {model_name}")
             self.servers[model_name] = self._create_server(gguf_path)
-
         server = self.servers[model_name]
         return server.chat_completion(body)
 
     def _create_server(self, gguf_path):
         """
         Create a new LlamaCppServer instance for the given model.
-
         Args:
             gguf_path (str): Path to the GGUF model file.
-
         Returns:
             LlamaCppServer: A new server instance.
         """
@@ -96,49 +101,63 @@ class LlamaCpp:
     def _install_llama_cpp_binaries(self):
         """
         Download and install llama.cpp binaries.
-
         Returns:
             str: Path to the installed llama.cpp binaries.
         """
         self._log("Installing llama.cpp binaries...")
-        release_info = self._get_latest_release()
-        assets = release_info["assets"]
-        asset = self._get_appropriate_asset(assets)
-        if not asset:
-            raise RuntimeError("No appropriate binary found for your system.")
+        try:
+            # Use pinned version if provided, otherwise fetch the latest release
+            release_info = self._get_release_info()
+            assets = release_info["assets"]
+            asset = self._get_appropriate_asset(assets)
+            if not asset:
+                raise RuntimeError("No appropriate binary found for your system.")
+            asset_name = asset["name"]
 
-        asset_name = asset["name"]
-        if self._check_cache(release_info, asset):
-            self._log("Using cached llama.cpp binaries.")
-        else:
-            self._download_and_unzip(asset["browser_download_url"], asset_name)
-            self._update_cache_info(release_info, asset)
+            # Check if cached binaries match the required version
+            if self._check_cache(release_info, asset):
+                self._log("Using cached llama.cpp binaries.")
+            else:
+                if not self._internet_available():
+                    raise RuntimeError(
+                        "No cached binary available and unable to fetch from the internet."
+                    )
+                self._download_and_unzip(asset["browser_download_url"], asset_name)
+                self._update_cache_info(release_info, asset)
+
+        except Exception as e:
+            self._log(f"Error during binary installation: {e}")
+            raise
 
         return os.path.join(self.cache_dir, "llama_cpp")
 
-    def _get_latest_release(self):
+    def _get_release_info(self):
         """
-        Fetch the latest release of llama.cpp from GitHub.
-
+        Fetch metadata of the specified release (pinned or latest) from GitHub.
         Returns:
             dict: Release information.
         """
-        api_url = "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest"
+        if self.pinned_version:
+            api_url = f"https://api.github.com/repos/ggerganov/llama.cpp/releases/tags/{self.pinned_version}"
+        else:
+            api_url = "https://api.github.com/repos/ggerganov/llama.cpp/releases/latest"
+
+        if not self._internet_available():
+            # Fall back to cache if no internet access
+            raise RuntimeError("No internet access and no cached version available.")
+
         response = requests.get(api_url)
         if response.status_code == 200:
             return response.json()
         else:
-            raise RuntimeError(
-                f"Failed to fetch release info. Status code: {response.status_code}"
-            )
+            error_reason = f"Failed to fetch release info: HTTP {response.status_code}"
+            raise RuntimeError(error_reason)
 
     def _get_appropriate_asset(self, assets):
         """
         Select the appropriate binary asset for the current system.
-
         Args:
             assets (list): List of asset metadata from the release.
-
         Returns:
             dict or None: Matching asset metadata, or None if no match found.
         """
@@ -168,13 +187,11 @@ class LlamaCpp:
     def _check_cache(self, release_info, asset):
         """
         Check whether the latest binaries are already cached.
-
         Args:
             release_info (dict): Metadata of the latest release.
             asset (dict): Metadata of the selected asset.
-
         Returns:
-            bool: True if the cached binary matches the latest release, False otherwise.
+            bool: True if the cached binary matches the required release, False otherwise.
         """
         cache_info_path = os.path.join(self.cache_dir, "cache_info.json")
         if os.path.exists(cache_info_path):
@@ -190,7 +207,6 @@ class LlamaCpp:
     def _download_and_unzip(self, url, asset_name):
         """
         Download and extract llama.cpp binaries.
-
         Args:
             url (str): URL of the asset to download.
             asset_name (str): Name of the asset file.
@@ -205,7 +221,6 @@ class LlamaCpp:
             self._log(f"Successfully downloaded: {asset_name}")
         else:
             raise RuntimeError(f"Failed to download binary: {url}")
-
         extract_dir = os.path.join(self.cache_dir, "llama_cpp")
         with zipfile.ZipFile(zip_path, "r") as zip_ref:
             zip_ref.extractall(extract_dir)
@@ -214,7 +229,6 @@ class LlamaCpp:
     def _update_cache_info(self, release_info, asset):
         """
         Update cache metadata with the downloaded release info.
-
         Args:
             release_info (dict): Metadata of the latest release.
             asset (dict): Metadata of the downloaded asset.
@@ -224,10 +238,21 @@ class LlamaCpp:
         with open(cache_info_path, "w") as f:
             json.dump(cache_info, f)
 
+    def _internet_available(self):
+        """
+        Check for internet connectivity.
+        Returns:
+            bool: True if the internet is accessible, False otherwise.
+        """
+        try:
+            requests.get("https://api.github.com", timeout=3)
+            return True
+        except requests.ConnectionError:
+            return False
+
     def _log(self, message):
         """
         Print a log message if verbosity is enabled.
-
         Args:
             message (str): Log message to print.
         """