New Deployment

Added Dockerfile and compose.yaml as an option for deployment. Updated README.md
2025-05-14 08:40:55 +00:00 · 2025-04-18 16:03:20 -04:00 · 2025-04-18 16:03:20 -04:00 · f7ecd272b2
commit f7ecd272b2
parent 40c5cc4b0f
3 changed files with 144 additions and 0 deletions
--- a/115
+++ b/115
@ -0,0 +1,115 @@
 FROM python:3.11-slim
 WORKDIR /app
 # Install essential packages
 RUN apt-get update && apt-get install -y \
    curl \
    wget \
    git \
    build-essential \
    && rm -rf /var/lib/apt/lists/*
 # Copy the project files
 COPY . /app/
 # Install the package
 RUN pip install --no-cache-dir -e .
 RUN pip install requests
 # Create a volume mount point for models and cache
 VOLUME /models
 VOLUME /cache
 # Create a proxy server script
 RUN echo 'import os\n\
 import uvicorn\n\
 from fastapi import FastAPI, Request\n\
 from fastapi.responses import StreamingResponse, JSONResponse\n\
 from llama_cpp_runner.main import LlamaCpp\n\
 \n\
 app = FastAPI(title="LlamaCpp Proxy")\n\
 \n\
 # Initialize the LlamaCpp class\n\
 models_dir = os.environ.get("MODELS_DIR", "/models")\n\
 cache_dir = os.environ.get("CACHE_DIR", "/cache")\n\
 verbose = os.environ.get("VERBOSE", "true").lower() == "true"\n\
 timeout = int(os.environ.get("TIMEOUT_MINUTES", "30"))\n\
 \n\
 print(f"Models directory: {models_dir}")\n\
 print(f"Cache directory: {cache_dir}")\n\
 \n\
 # Create the LlamaCpp instance\n\
 llama_runner = LlamaCpp(\n\
    models_dir=models_dir,\n\
    cache_dir=cache_dir, \n\
    verbose=verbose, \n\
    timeout_minutes=timeout\n\
 )\n\
 \n\
@app.get("/")\n\
 def read_root():\n\
    """Get server status and list of available models."""\n\
    return {"status": "running", "models": llama_runner.list_models()}\n\
 \n\
@app.post("/v1/chat/completions")\n\
 async def chat_completions(request: Request):\n\
    """Forward chat completion requests to the LlamaCpp server."""\n\
    try:\n\
        body = await request.json()\n\
        \n\
        if "model" not in body:\n\
            return JSONResponse(\n\
                status_code=400,\n\
                content={"error": "Model not specified in request"}\n\
            )\n\
        \n\
        try:\n\
            result = llama_runner.chat_completion(body)\n\
            \n\
            # Handle streaming responses\n\
            if body.get("stream", False):\n\
                async def generate():\n\
                    for line in result:\n\
                        if line:\n\
                            yield f"data: {line}\\n\\n"\n\
                    yield "data: [DONE]\\n\\n"\n\
                \n\
                return StreamingResponse(generate(), media_type="text/event-stream")\n\
            else:\n\
                return result\n\
        except Exception as e:\n\
            return JSONResponse(\n\
                status_code=500,\n\
                content={"error": str(e)}\n\
            )\n\
    except Exception as e:\n\
        return JSONResponse(\n\
            status_code=400,\n\
            content={"error": f"Invalid request: {str(e)}"}\n\
        )\n\
 \n\
@app.get("/models")\n\
 def list_models():\n\
    """List all available models."""\n\
    return {"models": llama_runner.list_models()}\n\
 \n\
 if __name__ == "__main__":\n\
    print("Starting LlamaCpp Proxy Server on port 3636")\n\
    models = llama_runner.list_models()\n\
    print(f"Available models: {models}")\n\
    if not models:\n\
        print("WARNING: No models found in the models directory.")\n\
    uvicorn.run(app, host="0.0.0.0", port=3636)\n' > /app/proxy_server.py
 # Install FastAPI and Uvicorn
 RUN pip install --no-cache-dir fastapi uvicorn
 # Expose the proxy server port
 EXPOSE 3636
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
 # Command to run when the container starts
 CMD ["python", "/app/proxy_server.py"]
--- a/README.md
+++ b/README.md
@ -25,6 +25,20 @@ Installing `llama-cpp-runner` is quick and easy! Just use pip:
 pip install llama-cpp-runner
 ```
 ## Optional Installation (Docker)
 Clone the repository 
 ```bash
 git clone https://github.com/open-webui/llama-cpp-runner
 ```
 Build and run
 ```bash
 docker compose up -d
 ```
 ## Usage 📖
 ### Initialize the Runner
--- a/compose.yaml
+++ b/compose.yaml
@ -0,0 +1,15 @@
 services:
  llama-cpp-runner:
    build: .
    container_name: owui-llama-cpp-runner
    ports:
      - "3636:3636"
    volumes:
      - ./models:/models
      - ./cache:/cache
    environment:
      - MODELS_DIR=/models
      - CACHE_DIR=/cache
      - VERBOSE=true
      - TIMEOUT_MINUTES=30
    restart: unless-stopped