From f7ecd272b2db2b3f7b40d97f694976a0464de059 Mon Sep 17 00:00:00 2001 From: pkeffect Date: Fri, 18 Apr 2025 16:03:20 -0400 Subject: [PATCH] New Deployment Added Dockerfile and compose.yaml as an option for deployment. Updated README.md --- Dockerfile | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 14 +++++++ compose.yaml | 15 +++++++ 3 files changed, 144 insertions(+) create mode 100644 Dockerfile create mode 100644 compose.yaml diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..107c304 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,115 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install essential packages +RUN apt-get update && apt-get install -y \ + curl \ + wget \ + git \ + build-essential \ + && rm -rf /var/lib/apt/lists/* + +# Copy the project files +COPY . /app/ + +# Install the package +RUN pip install --no-cache-dir -e . +RUN pip install requests + +# Create a volume mount point for models and cache +VOLUME /models +VOLUME /cache + +# Create a proxy server script +RUN echo 'import os\n\ +import uvicorn\n\ +from fastapi import FastAPI, Request\n\ +from fastapi.responses import StreamingResponse, JSONResponse\n\ +from llama_cpp_runner.main import LlamaCpp\n\ +\n\ +app = FastAPI(title="LlamaCpp Proxy")\n\ +\n\ +# Initialize the LlamaCpp class\n\ +models_dir = os.environ.get("MODELS_DIR", "/models")\n\ +cache_dir = os.environ.get("CACHE_DIR", "/cache")\n\ +verbose = os.environ.get("VERBOSE", "true").lower() == "true"\n\ +timeout = int(os.environ.get("TIMEOUT_MINUTES", "30"))\n\ +\n\ +print(f"Models directory: {models_dir}")\n\ +print(f"Cache directory: {cache_dir}")\n\ +\n\ +# Create the LlamaCpp instance\n\ +llama_runner = LlamaCpp(\n\ + models_dir=models_dir,\n\ + cache_dir=cache_dir, \n\ + verbose=verbose, \n\ + timeout_minutes=timeout\n\ +)\n\ +\n\ +@app.get("/")\n\ +def read_root():\n\ + """Get server status and list of available models."""\n\ + return {"status": "running", "models": llama_runner.list_models()}\n\ +\n\ +@app.post("/v1/chat/completions")\n\ +async def chat_completions(request: Request):\n\ + """Forward chat completion requests to the LlamaCpp server."""\n\ + try:\n\ + body = await request.json()\n\ + \n\ + if "model" not in body:\n\ + return JSONResponse(\n\ + status_code=400,\n\ + content={"error": "Model not specified in request"}\n\ + )\n\ + \n\ + try:\n\ + result = llama_runner.chat_completion(body)\n\ + \n\ + # Handle streaming responses\n\ + if body.get("stream", False):\n\ + async def generate():\n\ + for line in result:\n\ + if line:\n\ + yield f"data: {line}\\n\\n"\n\ + yield "data: [DONE]\\n\\n"\n\ + \n\ + return StreamingResponse(generate(), media_type="text/event-stream")\n\ + else:\n\ + return result\n\ + except Exception as e:\n\ + return JSONResponse(\n\ + status_code=500,\n\ + content={"error": str(e)}\n\ + )\n\ + except Exception as e:\n\ + return JSONResponse(\n\ + status_code=400,\n\ + content={"error": f"Invalid request: {str(e)}"}\n\ + )\n\ +\n\ +@app.get("/models")\n\ +def list_models():\n\ + """List all available models."""\n\ + return {"models": llama_runner.list_models()}\n\ +\n\ +if __name__ == "__main__":\n\ + print("Starting LlamaCpp Proxy Server on port 3636")\n\ + models = llama_runner.list_models()\n\ + print(f"Available models: {models}")\n\ + if not models:\n\ + print("WARNING: No models found in the models directory.")\n\ + uvicorn.run(app, host="0.0.0.0", port=3636)\n' > /app/proxy_server.py + +# Install FastAPI and Uvicorn +RUN pip install --no-cache-dir fastapi uvicorn + +# Expose the proxy server port +EXPOSE 3636 + +# Set environment variables +ENV PYTHONUNBUFFERED=1 + +# Command to run when the container starts +CMD ["python", "/app/proxy_server.py"] \ No newline at end of file diff --git a/README.md b/README.md index f9e6173..88f425f 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,20 @@ Installing `llama-cpp-runner` is quick and easy! Just use pip: pip install llama-cpp-runner ``` +## Optional Installation (Docker) + +Clone the repository + +```bash +git clone https://github.com/open-webui/llama-cpp-runner +``` + +Build and run + +```bash +docker compose up -d +``` + ## Usage 📖 ### Initialize the Runner diff --git a/compose.yaml b/compose.yaml new file mode 100644 index 0000000..8a553ff --- /dev/null +++ b/compose.yaml @@ -0,0 +1,15 @@ +services: + llama-cpp-runner: + build: . + container_name: owui-llama-cpp-runner + ports: + - "3636:3636" + volumes: + - ./models:/models + - ./cache:/cache + environment: + - MODELS_DIR=/models + - CACHE_DIR=/cache + - VERBOSE=true + - TIMEOUT_MINUTES=30 + restart: unless-stopped \ No newline at end of file