open-webui/backend/apps/litellm/main.py

from fastapi import FastAPI, Depends, HTTPException
from fastapi.routing import APIRoute
from fastapi.middleware.cors import CORSMiddleware

import logging
from fastapi import FastAPI, Request, Depends, status, Response
from fastapi.responses import JSONResponse

from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.responses import StreamingResponse
import json
import requests

from utils.utils import get_verified_user, get_current_user
from config import SRC_LOG_LEVELS, ENV
from constants import ERROR_MESSAGES

log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["LITELLM"])


from config import (
    MODEL_FILTER_ENABLED,
    MODEL_FILTER_LIST,
)


import asyncio
import subprocess


app = FastAPI()

origins = ["*"]

app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


# Global variable to store the subprocess reference
background_process = None


async def run_background_process(command):
    global background_process
    print("run_background_process")

    try:
        # Log the command to be executed
        print(f"Executing command: {command}")
        # Execute the command and create a subprocess
        process = await asyncio.create_subprocess_exec(
            *command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE
        )
        background_process = process
        print("Subprocess started successfully.")

        # Capture STDERR for debugging purposes
        stderr_output = await process.stderr.read()
        stderr_text = stderr_output.decode().strip()
        if stderr_text:
            print(f"Subprocess STDERR: {stderr_text}")

        # Print output line by line
        async for line in process.stdout:
            print(line.decode().strip())

        # Wait for the process to finish
        returncode = await process.wait()
        print(f"Subprocess exited with return code {returncode}")
    except Exception as e:
        log.error(f"Failed to start subprocess: {e}")
        raise  # Optionally re-raise the exception if you want it to propagate


async def start_litellm_background():
    print("start_litellm_background")
    # Command to run in the background
    command = "litellm --telemetry False --config ./data/litellm/config.yaml"

    await run_background_process(command)


async def shutdown_litellm_background():
    print("shutdown_litellm_background")
    global background_process
    if background_process:
        background_process.terminate()
        await background_process.wait()  # Ensure the process has terminated
        print("Subprocess terminated")


@app.on_event("startup")
async def startup_event():

    print("startup_event")
    # TODO: Check config.yaml file and create one
    asyncio.create_task(start_litellm_background())


app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED
app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST


@app.get("/")
async def get_status():
    return {"status": True}


@app.get("/models")
@app.get("/v1/models")
async def get_models(user=Depends(get_current_user)):
    url = "http://localhost:4000/v1"
    r = None
    try:
        r = requests.request(method="GET", url=f"{url}/models")
        r.raise_for_status()

        data = r.json()

        if app.state.MODEL_FILTER_ENABLED:
            if user and user.role == "user":
                data["data"] = list(
                    filter(
                        lambda model: model["id"] in app.state.MODEL_FILTER_LIST,
                        data["data"],
                    )
                )

        return data
    except Exception as e:
        log.exception(e)
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"External: {res['error']}"
            except:
                error_detail = f"External: {e}"

        raise HTTPException(
            status_code=r.status_code if r else 500,
            detail=error_detail,
        )


@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy(path: str, request: Request, user=Depends(get_verified_user)):
    body = await request.body()

    url = "http://localhost:4000/v1"

    target_url = f"{url}/{path}"

    headers = {}
    # headers["Authorization"] = f"Bearer {key}"
    headers["Content-Type"] = "application/json"

    r = None

    try:
        r = requests.request(
            method=request.method,
            url=target_url,
            data=body,
            headers=headers,
            stream=True,
        )

        r.raise_for_status()

        # Check if response is SSE
        if "text/event-stream" in r.headers.get("Content-Type", ""):
            return StreamingResponse(
                r.iter_content(chunk_size=8192),
                status_code=r.status_code,
                headers=dict(r.headers),
            )
        else:
            response_data = r.json()
            return response_data
    except Exception as e:
        log.exception(e)
        error_detail = "Open WebUI: Server Connection Error"
        if r is not None:
            try:
                res = r.json()
                if "error" in res:
                    error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"
            except:
                error_detail = f"External: {e}"

        raise HTTPException(
            status_code=r.status_code if r else 500, detail=error_detail
        )


# class ModifyModelsResponseMiddleware(BaseHTTPMiddleware):
#     async def dispatch(
#         self, request: Request, call_next: RequestResponseEndpoint
#     ) -> Response:

#         response = await call_next(request)
#         user = request.state.user

#         if "/models" in request.url.path:
#             if isinstance(response, StreamingResponse):
#                 # Read the content of the streaming response
#                 body = b""
#                 async for chunk in response.body_iterator:
#                     body += chunk

#                 data = json.loads(body.decode("utf-8"))

#                 if app.state.MODEL_FILTER_ENABLED:
#                     if user and user.role == "user":
#                         data["data"] = list(
#                             filter(
#                                 lambda model: model["id"]
#                                 in app.state.MODEL_FILTER_LIST,
#                                 data["data"],
#                             )
#                         )

#                 # Modified Flag
#                 data["modified"] = True
#                 return JSONResponse(content=data)

#         return response


# app.add_middleware(ModifyModelsResponseMiddleware)
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`from fastapi import FastAPI, Depends, HTTPException`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`from fastapi.routing import APIRoute`
			`from fastapi.middleware.cors import CORSMiddleware`
refac: litellm 2024-03-08 21:33:56 +00:00
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`import logging`
feat: litellm model filter support 2024-03-21 02:28:33 +00:00			`from fastapi import FastAPI, Request, Depends, status, Response`
refac: litellm 2024-03-08 21:33:56 +00:00			`from fastapi.responses import JSONResponse`
feat: litellm model filter support 2024-03-21 02:28:33 +00:00
			`from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint`
			`from starlette.responses import StreamingResponse`
			`import json`
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`import requests`
feat: litellm model filter support 2024-03-21 02:28:33 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`from utils.utils import get_verified_user, get_current_user`
Migrate to python logging module with env var control. 2024-03-20 23:11:36 +00:00			`from config import SRC_LOG_LEVELS, ENV`
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`from constants import ERROR_MESSAGES`
Migrate to python logging module with env var control. 2024-03-20 23:11:36 +00:00
			`log = logging.getLogger(__name__)`
			`log.setLevel(SRC_LOG_LEVELS["LITELLM"])`
refac: litellm 2024-03-08 21:33:56 +00:00
feat: litellm model filter support 2024-03-21 02:28:33 +00:00
			`from config import (`
			`MODEL_FILTER_ENABLED,`
			`MODEL_FILTER_LIST,`
			`)`


fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`import asyncio`
			`import subprocess`
refac: litellm 2024-03-08 21:33:56 +00:00

fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`app = FastAPI()`
refac: litellm 2024-03-08 21:33:56 +00:00
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`origins = ["*"]`
refac: litellm 2024-03-08 21:33:56 +00:00
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`app.add_middleware(`
			`CORSMiddleware,`
			`allow_origins=origins,`
			`allow_credentials=True,`
			`allow_methods=["*"],`
			`allow_headers=["*"],`
			`)`
refac: litellm 2024-03-08 21:33:56 +00:00
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
refac: close subprocess gracefully 2024-04-21 06:46:09 +00:00			`# Global variable to store the subprocess reference`
			`background_process = None`

pwned :) 2024-04-21 06:22:02 +00:00
refac: close subprocess gracefully 2024-04-21 06:46:09 +00:00			`async def run_background_process(command):`
			`global background_process`
			`print("run_background_process")`

			`try:`
			`# Log the command to be executed`
			`print(f"Executing command: {command}")`
			`# Execute the command and create a subprocess`
			`process = await asyncio.create_subprocess_exec(`
			`*command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE`
			`)`
			`background_process = process`
			`print("Subprocess started successfully.")`

			`# Capture STDERR for debugging purposes`
			`stderr_output = await process.stderr.read()`
			`stderr_text = stderr_output.decode().strip()`
			`if stderr_text:`
			`print(f"Subprocess STDERR: {stderr_text}")`

			`# Print output line by line`
			`async for line in process.stdout:`
			`print(line.decode().strip())`

			`# Wait for the process to finish`
			`returncode = await process.wait()`
			`print(f"Subprocess exited with return code {returncode}")`
			`except Exception as e:`
			`log.error(f"Failed to start subprocess: {e}")`
			`raise # Optionally re-raise the exception if you want it to propagate`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00

			`async def start_litellm_background():`
pwned :) 2024-04-21 06:22:02 +00:00			`print("start_litellm_background")`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`# Command to run in the background`
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`command = "litellm --telemetry False --config ./data/litellm/config.yaml"`
pwned :) 2024-04-21 06:22:02 +00:00
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`await run_background_process(command)`
refac: litellm 2024-03-08 21:33:56 +00:00

refac: close subprocess gracefully 2024-04-21 06:46:09 +00:00			`async def shutdown_litellm_background():`
			`print("shutdown_litellm_background")`
			`global background_process`
			`if background_process:`
			`background_process.terminate()`
			`await background_process.wait() # Ensure the process has terminated`
			`print("Subprocess terminated")`


refac: litellm 2024-03-08 21:33:56 +00:00			`@app.on_event("startup")`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`async def startup_event():`
pwned :) 2024-04-21 06:22:02 +00:00
			`print("startup_event")`
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`# TODO: Check config.yaml file and create one`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`asyncio.create_task(start_litellm_background())`
refac: litellm 2024-03-08 21:33:56 +00:00

feat: litellm model filter support 2024-03-21 02:28:33 +00:00			`app.state.MODEL_FILTER_ENABLED = MODEL_FILTER_ENABLED`
			`app.state.MODEL_FILTER_LIST = MODEL_FILTER_LIST`


fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00			`@app.get("/")`
			`async def get_status():`
			`return {"status": True}`


DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`@app.get("/models")`
			`@app.get("/v1/models")`
			`async def get_models(user=Depends(get_current_user)):`
			`url = "http://localhost:4000/v1"`
			`r = None`
			`try:`
			`r = requests.request(method="GET", url=f"{url}/models")`
			`r.raise_for_status()`
feat: litellm model filter support 2024-03-21 02:28:33 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`data = r.json()`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`if app.state.MODEL_FILTER_ENABLED:`
			`if user and user.role == "user":`
			`data["data"] = list(`
			`filter(`
			`lambda model: model["id"] in app.state.MODEL_FILTER_LIST,`
			`data["data"],`
			`)`
			`)`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`return data`
			`except Exception as e:`
			`log.exception(e)`
			`error_detail = "Open WebUI: Server Connection Error"`
			`if r is not None:`
			`try:`
			`res = r.json()`
			`if "error" in res:`
			`error_detail = f"External: {res['error']}"`
			`except:`
			`error_detail = f"External: {e}"`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`raise HTTPException(`
			`status_code=r.status_code if r else 500,`
			`detail=error_detail,`
			`)`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00

DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])`
			`async def proxy(path: str, request: Request, user=Depends(get_verified_user)):`
			`body = await request.body()`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`url = "http://localhost:4000/v1"`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`target_url = f"{url}/{path}"`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`headers = {}`
			`# headers["Authorization"] = f"Bearer {key}"`
			`headers["Content-Type"] = "application/json"`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`r = None`
fix: run litellm as subprocess 2024-04-21 05:52:27 +00:00
DO NOT TRACK ME >:( 2024-04-21 06:13:24 +00:00			`try:`
			`r = requests.request(`
			`method=request.method,`
			`url=target_url,`
			`data=body,`
			`headers=headers,`
			`stream=True,`
			`)`

			`r.raise_for_status()`

			`# Check if response is SSE`
			`if "text/event-stream" in r.headers.get("Content-Type", ""):`
			`return StreamingResponse(`
			`r.iter_content(chunk_size=8192),`
			`status_code=r.status_code,`
			`headers=dict(r.headers),`
			`)`
			`else:`
			`response_data = r.json()`
			`return response_data`
			`except Exception as e:`
			`log.exception(e)`
			`error_detail = "Open WebUI: Server Connection Error"`
			`if r is not None:`
			`try:`
			`res = r.json()`
			`if "error" in res:`
			`error_detail = f"External: {res['error']['message'] if 'message' in res['error'] else res['error']}"`
			`except:`
			`error_detail = f"External: {e}"`

			`raise HTTPException(`
			`status_code=r.status_code if r else 500, detail=error_detail`
			`)`


			`# class ModifyModelsResponseMiddleware(BaseHTTPMiddleware):`
			`# async def dispatch(`
			`# self, request: Request, call_next: RequestResponseEndpoint`
			`# ) -> Response:`

			`# response = await call_next(request)`
			`# user = request.state.user`

			`# if "/models" in request.url.path:`
			`# if isinstance(response, StreamingResponse):`
			`# # Read the content of the streaming response`
			`# body = b""`
			`# async for chunk in response.body_iterator:`
			`# body += chunk`

			`# data = json.loads(body.decode("utf-8"))`

			`# if app.state.MODEL_FILTER_ENABLED:`
			`# if user and user.role == "user":`
			`# data["data"] = list(`
			`# filter(`
			`# lambda model: model["id"]`
			`# in app.state.MODEL_FILTER_LIST,`
			`# data["data"],`
			`# )`
			`# )`

			`# # Modified Flag`
			`# data["modified"] = True`
			`# return JSONResponse(content=data)`

			`# return response`


			`# app.add_middleware(ModifyModelsResponseMiddleware)`