From fd8f43a156a6b9fbf50d7128c1d58a0d16e152f0 Mon Sep 17 00:00:00 2001 From: pkeffect Date: Fri, 18 Apr 2025 18:39:29 -0400 Subject: [PATCH] refactor and update --- .gitignore | 7 +++-- Dockerfile | 28 ++++++++++-------- compose.yaml | 16 +++++++--- proxy_server.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 20 deletions(-) create mode 100644 proxy_server.py diff --git a/.gitignore b/.gitignore index 823e384..631947d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ -llama_cpp_cache -cache -dist \ No newline at end of file +llama_cpp_cache/ +cache/ +dist/ +models/* \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 107c304..1f3d580 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,26 +2,27 @@ FROM python:3.11-slim WORKDIR /app -# Install essential packages -RUN apt-get update && apt-get install -y \ +# Install only essential packages and clean up in one layer to reduce image size +RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ wget \ git \ build-essential \ + && apt-get clean \ && rm -rf /var/lib/apt/lists/* -# Copy the project files -COPY . /app/ +# Copy only necessary files +COPY pyproject.toml README.md LICENSE /app/ +COPY src/ /app/src/ -# Install the package -RUN pip install --no-cache-dir -e . -RUN pip install requests +# Install the package in development mode and required dependencies +RUN pip install --no-cache-dir -e . && pip install --no-cache-dir requests fastapi uvicorn -# Create a volume mount point for models and cache +# Create volume mount points VOLUME /models VOLUME /cache -# Create a proxy server script +# Create proxy server script directly in the Dockerfile RUN echo 'import os\n\ import uvicorn\n\ from fastapi import FastAPI, Request\n\ @@ -100,16 +101,17 @@ if __name__ == "__main__":\n\ print(f"Available models: {models}")\n\ if not models:\n\ print("WARNING: No models found in the models directory.")\n\ - uvicorn.run(app, host="0.0.0.0", port=3636)\n' > /app/proxy_server.py - -# Install FastAPI and Uvicorn -RUN pip install --no-cache-dir fastapi uvicorn + uvicorn.run(app, host="0.0.0.0", port=3636)' > /app/proxy_server.py # Expose the proxy server port EXPOSE 3636 # Set environment variables ENV PYTHONUNBUFFERED=1 +ENV MODELS_DIR=/models +ENV CACHE_DIR=/cache +ENV VERBOSE=true +ENV TIMEOUT_MINUTES=30 # Command to run when the container starts CMD ["python", "/app/proxy_server.py"] \ No newline at end of file diff --git a/compose.yaml b/compose.yaml index 8a553ff..bf2ae02 100644 --- a/compose.yaml +++ b/compose.yaml @@ -1,15 +1,23 @@ services: - llama-cpp-runner: + owui-llama-cpp-runner: build: . container_name: owui-llama-cpp-runner ports: - "3636:3636" volumes: - - ./models:/models - - ./cache:/cache + - ./models:/models # local mount + - ./cache:/cache # local mount + # Remove . from the paths above to use native docker volumes environment: - MODELS_DIR=/models - CACHE_DIR=/cache - VERBOSE=true - TIMEOUT_MINUTES=30 - restart: unless-stopped \ No newline at end of file + - LD_LIBRARY_PATH=/cache/llama_cpp/build/bin + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3636/"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s \ No newline at end of file diff --git a/proxy_server.py b/proxy_server.py new file mode 100644 index 0000000..63685b5 --- /dev/null +++ b/proxy_server.py @@ -0,0 +1,79 @@ +import os +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import StreamingResponse, JSONResponse +from llama_cpp_runner.main import LlamaCpp + +app = FastAPI(title="LlamaCpp Proxy") + +# Initialize the LlamaCpp class +models_dir = os.environ.get("MODELS_DIR", "/models") +cache_dir = os.environ.get("CACHE_DIR", "/cache") +verbose = os.environ.get("VERBOSE", "true").lower() == "true" +timeout = int(os.environ.get("TIMEOUT_MINUTES", "30")) + +print(f"Models directory: {models_dir}") +print(f"Cache directory: {cache_dir}") + +# Create the LlamaCpp instance +llama_runner = LlamaCpp( + models_dir=models_dir, + cache_dir=cache_dir, + verbose=verbose, + timeout_minutes=timeout +) + +@app.get("/") +def read_root(): + """Get server status and list of available models.""" + return {"status": "running", "models": llama_runner.list_models()} + +@app.post("/v1/chat/completions") +async def chat_completions(request: Request): + """Forward chat completion requests to the LlamaCpp server.""" + try: + body = await request.json() + + if "model" not in body: + return JSONResponse( + status_code=400, + content={"error": "Model not specified in request"} + ) + + try: + result = llama_runner.chat_completion(body) + + # Handle streaming responses + if body.get("stream", False): + async def generate(): + for line in result: + if line: + yield f"data: {line}\n\n" + yield "data: [DONE]\n\n" + + return StreamingResponse(generate(), media_type="text/event-stream") + else: + return result + except Exception as e: + return JSONResponse( + status_code=500, + content={"error": str(e)} + ) + except Exception as e: + return JSONResponse( + status_code=400, + content={"error": f"Invalid request: {str(e)}"} + ) + +@app.get("/models") +def list_models(): + """List all available models.""" + return {"models": llama_runner.list_models()} + +if __name__ == "__main__": + print("Starting LlamaCpp Proxy Server on port 3636") + models = llama_runner.list_models() + print(f"Available models: {models}") + if not models: + print("WARNING: No models found in the models directory.") + uvicorn.run(app, host="0.0.0.0", port=3636) \ No newline at end of file