From 30fa228a8400eb8a5684ee5e4be43938e731739c Mon Sep 17 00:00:00 2001
From: "0xThresh.eth" <0xthresh@protonmail.com>
Date: Thu, 6 Jun 2024 22:54:12 -0700
Subject: [PATCH] Working DataDog pipeline

---
 .dockerignore                               |   3 +-
 .gitignore                                  |   3 +-
 Dockerfile.rust                             |  58 +++++++++
 dev-docker.sh                               |   9 ++
 examples/filters/datadog_filter_pipeline.py | 128 ++++++++++++++++++++
 5 files changed, 199 insertions(+), 2 deletions(-)
 create mode 100644 Dockerfile.rust
 create mode 100755 dev-docker.sh
 create mode 100644 examples/filters/datadog_filter_pipeline.py

diff --git a/.dockerignore b/.dockerignore
index b694934..c2eabec 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,2 @@
-.venv
\ No newline at end of file
+.venv
+.env
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index d21938e..d454a74 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,5 @@ pipelines/*
 !pipelines/.gitignore
 .DS_Store
 
-.venv
\ No newline at end of file
+.venv
+venv/
\ No newline at end of file
diff --git a/Dockerfile.rust b/Dockerfile.rust
new file mode 100644
index 0000000..1f80627
--- /dev/null
+++ b/Dockerfile.rust
@@ -0,0 +1,58 @@
+FROM python:3.11-slim-bookworm as base
+
+# Use args
+ARG USE_CUDA
+ARG USE_CUDA_VER
+
+## Basis ##
+ENV ENV=prod \
+    PORT=9099 \
+    # pass build args to the build
+    USE_CUDA_DOCKER=${USE_CUDA} \
+    USE_CUDA_DOCKER_VER=${USE_CUDA_VER}
+
+
+# Install GCC and build tools
+RUN apt-get update && \
+    apt-get install -y gcc build-essential curl git && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
+WORKDIR /app
+
+# Install Python dependencies
+COPY ./requirements.txt .
+RUN pip3 install uv && \
+    if [ "$USE_CUDA" = "true" ]; then \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir; \
+    else \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir; \
+    fi
+
+# Copy the application code
+COPY . .
+
+# Install Rust compiler and ddtrace which are required for DataDog components
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+# Set up the Rust environment
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN /root/.cargo/bin/rustup default stable
+
+# DEBUG - check that Rust installed correctly
+RUN cargo --version
+
+# Set the working directory to the Pipelines app dir
+WORKDIR /app
+
+# Install Python dependencies
+RUN pip3 install git+https://github.com/DataDog/dd-trace-py.git@main
+
+# Expose the port
+ENV HOST="0.0.0.0"
+ENV PORT="9099"
+
+ENTRYPOINT [ "bash", "start.sh" ]
\ No newline at end of file
diff --git a/dev-docker.sh b/dev-docker.sh
new file mode 100755
index 0000000..a502b05
--- /dev/null
+++ b/dev-docker.sh
@@ -0,0 +1,9 @@
+# Removes any existing Open WebUI and Pipelines containers/ volumes - uncomment if you need a fresh start
+# docker rm --force pipelines
+# docker rm --force open-webui 
+# docker volume rm pipelines
+# docker volume rm open-webui 
+
+# Runs the containers with Ollama image for Open WebUI and the Pipelines endpoint in place
+docker run -d -p 9099:9099 --add-host=host.docker.internal:host-gateway -v pipelines:/app/pipelines --name pipelines --restart always --env-file .env ghcr.io/open-webui/pipelines:latest
+docker run -d -p 3000:8080 -v ~/.ollama:/root/.ollama -v open-webui:/app/backend/data --name open-webui --restart always -e OPENAI_API_BASE_URL=http://host.docker.internal:9099 -e OPENAI_API_KEY=0p3n-w3bu! ghcr.io/open-webui/open-webui:ollama
\ No newline at end of file
diff --git a/examples/filters/datadog_filter_pipeline.py b/examples/filters/datadog_filter_pipeline.py
new file mode 100644
index 0000000..a55ca9a
--- /dev/null
+++ b/examples/filters/datadog_filter_pipeline.py
@@ -0,0 +1,128 @@
+"""
+title: DataDog Filter Pipeline
+author: 0xThresh
+date: 2024-06-06
+version: 1.0
+license: MIT
+description: A filter pipeline that sends traces to DataDog.
+requirements: git+https://github.com/DataDog/dd-trace-py.git@main
+environment_variables: DD_LLMOBS_AGENTLESS_ENABLED, DD_LLMOBS_ENABLED, DD_LLMOBS_APP_NAME, DD_API_KEY, DD_SITE 
+"""
+
+from typing import List, Optional
+import os
+
+from utils.pipelines.main import get_last_user_message, get_last_assistant_message
+from pydantic import BaseModel
+from ddtrace.llmobs import LLMObs
+
+
+class Pipeline:
+    class Valves(BaseModel):
+        # List target pipeline ids (models) that this filter will be connected to.
+        # If you want to connect this filter to all pipelines, you can set pipelines to ["*"]
+        # e.g. ["llama3:latest", "gpt-3.5-turbo"]
+        pipelines: List[str] = []
+
+        # Assign a priority level to the filter pipeline.
+        # The priority level determines the order in which the filter pipelines are executed.
+        # The lower the number, the higher the priority.
+        priority: int = 0
+
+        # Valves
+        dd_api_key: str
+        dd_site: str
+        ml_app: str
+
+    def __init__(self):
+        # Pipeline filters are only compatible with Open WebUI
+        # You can think of filter pipeline as a middleware that can be used to edit the form data before it is sent to the OpenAI API.
+        self.type = "filter"
+
+        # Optionally, you can set the id and name of the pipeline.
+        # Best practice is to not specify the id so that it can be automatically inferred from the filename, so that users can install multiple versions of the same pipeline.
+        # The identifier must be unique across all pipelines.
+        # The identifier must be an alphanumeric string that can include underscores or hyphens. It cannot contain spaces, special characters, slashes, or backslashes.
+        # self.id = "datadog_filter_pipeline"
+        self.name = "DataDog Filter"
+
+        # Initialize
+        self.valves = self.Valves(
+            **{
+                "pipelines": ["*"],  # Connect to all pipelines
+                "dd_api_key": os.getenv("DD_API_KEY"),
+                "dd_site": os.getenv("DD_SITE", "datadoghq.com"),
+                "ml_app": os.getenv("ML_APP", "pipelines-test"),
+            }
+        )
+
+        # DataDog LLMOBS docs: https://docs.datadoghq.com/tracing/llm_observability/sdk/
+        self.LLMObs = LLMObs()
+        self.llm_span = None
+        self.chat_generations = {}
+        pass
+
+    async def on_startup(self):
+        # This function is called when the server is started.
+        print(f"on_startup:{__name__}")
+        self.set_dd()
+        pass
+
+    async def on_shutdown(self):
+        # This function is called when the server is stopped.
+        print(f"on_shutdown:{__name__}")
+        self.LLMObs.flush()
+        pass
+
+    async def on_valves_updated(self):
+        # This function is called when the valves are updated.
+        self.set_dd()
+        pass
+
+    def set_dd(self):
+        self.LLMObs.enable(
+            ml_app=self.valves.ml_app,
+            api_key=self.valves.dd_api_key,
+            site=self.valves.dd_site,
+            agentless_enabled=True,
+            integrations_enabled=True,
+        )
+
+    async def inlet(self, body: dict, user: Optional[dict] = None) -> dict:
+        print(f"inlet:{__name__}")
+
+        self.llm_span = self.LLMObs.llm(
+            model_name=body["model"],
+            name=f"filter:{__name__}",
+            model_provider="open-webui",
+            session_id=body["chat_id"],
+            ml_app=self.valves.ml_app
+        )
+
+        self.LLMObs.annotate(
+            span = self.llm_span,
+            input_data = get_last_user_message(body["messages"]),
+        )
+
+        print("SPAN: ")
+        print(self.llm_span)
+
+        return body
+
+
+    async def outlet(self, body: dict, user: Optional[dict] = None) -> dict:
+        print(f"outlet:{__name__}")
+        if body["chat_id"] not in self.chat_generations:
+            return body
+        print("SELF LLM SPAN")
+        print(self.llm_span)
+        #self.set_dd()
+        self.LLMObs.annotate(
+            span = self.llm_span,
+            output_data = get_last_assistant_message(body["messages"]),
+        )
+
+        self.llm_span.finish()
+        self.LLMObs.flush()
+
+        return body