From 30fa228a8400eb8a5684ee5e4be43938e731739c Mon Sep 17 00:00:00 2001
From: "0xThresh.eth" <0xthresh@protonmail.com>
Date: Thu, 6 Jun 2024 22:54:12 -0700
Subject: [PATCH 1/4] Working DataDog pipeline

---
 .dockerignore                               |   3 +-
 .gitignore                                  |   3 +-
 Dockerfile.rust                             |  58 +++++++++
 dev-docker.sh                               |   9 ++
 examples/filters/datadog_filter_pipeline.py | 128 ++++++++++++++++++++
 5 files changed, 199 insertions(+), 2 deletions(-)
 create mode 100644 Dockerfile.rust
 create mode 100755 dev-docker.sh
 create mode 100644 examples/filters/datadog_filter_pipeline.py

diff --git a/.dockerignore b/.dockerignore
index b694934..c2eabec 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,2 @@
-.venv
\ No newline at end of file
+.venv
+.env
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index d21938e..d454a74 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,5 @@ pipelines/*
 !pipelines/.gitignore
 .DS_Store
 
-.venv
\ No newline at end of file
+.venv
+venv/
\ No newline at end of file
diff --git a/Dockerfile.rust b/Dockerfile.rust
new file mode 100644
index 0000000..1f80627
--- /dev/null
+++ b/Dockerfile.rust
@@ -0,0 +1,58 @@
+FROM python:3.11-slim-bookworm as base
+
+# Use args
+ARG USE_CUDA
+ARG USE_CUDA_VER
+
+## Basis ##
+ENV ENV=prod \
+    PORT=9099 \
+    # pass build args to the build
+    USE_CUDA_DOCKER=${USE_CUDA} \
+    USE_CUDA_DOCKER_VER=${USE_CUDA_VER}
+
+
+# Install GCC and build tools
+RUN apt-get update && \
+    apt-get install -y gcc build-essential curl git && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
+WORKDIR /app
+
+# Install Python dependencies
+COPY ./requirements.txt .
+RUN pip3 install uv && \
+    if [ "$USE_CUDA" = "true" ]; then \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir; \
+    else \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
+    uv pip install --system -r requirements.txt --no-cache-dir; \
+    fi
+
+# Copy the application code
+COPY . .
+
+# Install Rust compiler and ddtrace which are required for DataDog components
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+# Set up the Rust environment
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN /root/.cargo/bin/rustup default stable
+
+# DEBUG - check that Rust installed correctly
+RUN cargo --version
+
+# Set the working directory to the Pipelines app dir
+WORKDIR /app
+
+# Install Python dependencies
+RUN pip3 install git+https://github.com/DataDog/dd-trace-py.git@main
+
+# Expose the port
+ENV HOST="0.0.0.0"
+ENV PORT="9099"
+
+ENTRYPOINT [ "bash", "start.sh" ]
\ No newline at end of file
diff --git a/dev-docker.sh b/dev-docker.sh
new file mode 100755
index 0000000..a502b05
--- /dev/null
+++ b/dev-docker.sh
@@ -0,0 +1,9 @@
+# Removes any existing Open WebUI and Pipelines containers/ volumes - uncomment if you need a fresh start
+# docker rm --force pipelines
+# docker rm --force open-webui 
+# docker volume rm pipelines
+# docker volume rm open-webui 
+
+# Runs the containers with Ollama image for Open WebUI and the Pipelines endpoint in place
+docker run -d -p 9099:9099 --add-host=host.docker.internal:host-gateway -v pipelines:/app/pipelines --name pipelines --restart always --env-file .env ghcr.io/open-webui/pipelines:latest
+docker run -d -p 3000:8080 -v ~/.ollama:/root/.ollama -v open-webui:/app/backend/data --name open-webui --restart always -e OPENAI_API_BASE_URL=http://host.docker.internal:9099 -e OPENAI_API_KEY=0p3n-w3bu! ghcr.io/open-webui/open-webui:ollama
\ No newline at end of file
diff --git a/examples/filters/datadog_filter_pipeline.py b/examples/filters/datadog_filter_pipeline.py
new file mode 100644
index 0000000..a55ca9a
--- /dev/null
+++ b/examples/filters/datadog_filter_pipeline.py
@@ -0,0 +1,128 @@
+"""
+title: DataDog Filter Pipeline
+author: 0xThresh
+date: 2024-06-06
+version: 1.0
+license: MIT
+description: A filter pipeline that sends traces to DataDog.
+requirements: git+https://github.com/DataDog/dd-trace-py.git@main
+environment_variables: DD_LLMOBS_AGENTLESS_ENABLED, DD_LLMOBS_ENABLED, DD_LLMOBS_APP_NAME, DD_API_KEY, DD_SITE 
+"""
+
+from typing import List, Optional
+import os
+
+from utils.pipelines.main import get_last_user_message, get_last_assistant_message
+from pydantic import BaseModel
+from ddtrace.llmobs import LLMObs
+
+
+class Pipeline:
+    class Valves(BaseModel):
+        # List target pipeline ids (models) that this filter will be connected to.
+        # If you want to connect this filter to all pipelines, you can set pipelines to ["*"]
+        # e.g. ["llama3:latest", "gpt-3.5-turbo"]
+        pipelines: List[str] = []
+
+        # Assign a priority level to the filter pipeline.
+        # The priority level determines the order in which the filter pipelines are executed.
+        # The lower the number, the higher the priority.
+        priority: int = 0
+
+        # Valves
+        dd_api_key: str
+        dd_site: str
+        ml_app: str
+
+    def __init__(self):
+        # Pipeline filters are only compatible with Open WebUI
+        # You can think of filter pipeline as a middleware that can be used to edit the form data before it is sent to the OpenAI API.
+        self.type = "filter"
+
+        # Optionally, you can set the id and name of the pipeline.
+        # Best practice is to not specify the id so that it can be automatically inferred from the filename, so that users can install multiple versions of the same pipeline.
+        # The identifier must be unique across all pipelines.
+        # The identifier must be an alphanumeric string that can include underscores or hyphens. It cannot contain spaces, special characters, slashes, or backslashes.
+        # self.id = "datadog_filter_pipeline"
+        self.name = "DataDog Filter"
+
+        # Initialize
+        self.valves = self.Valves(
+            **{
+                "pipelines": ["*"],  # Connect to all pipelines
+                "dd_api_key": os.getenv("DD_API_KEY"),
+                "dd_site": os.getenv("DD_SITE", "datadoghq.com"),
+                "ml_app": os.getenv("ML_APP", "pipelines-test"),
+            }
+        )
+
+        # DataDog LLMOBS docs: https://docs.datadoghq.com/tracing/llm_observability/sdk/
+        self.LLMObs = LLMObs()
+        self.llm_span = None
+        self.chat_generations = {}
+        pass
+
+    async def on_startup(self):
+        # This function is called when the server is started.
+        print(f"on_startup:{__name__}")
+        self.set_dd()
+        pass
+
+    async def on_shutdown(self):
+        # This function is called when the server is stopped.
+        print(f"on_shutdown:{__name__}")
+        self.LLMObs.flush()
+        pass
+
+    async def on_valves_updated(self):
+        # This function is called when the valves are updated.
+        self.set_dd()
+        pass
+
+    def set_dd(self):
+        self.LLMObs.enable(
+            ml_app=self.valves.ml_app,
+            api_key=self.valves.dd_api_key,
+            site=self.valves.dd_site,
+            agentless_enabled=True,
+            integrations_enabled=True,
+        )
+
+    async def inlet(self, body: dict, user: Optional[dict] = None) -> dict:
+        print(f"inlet:{__name__}")
+
+        self.llm_span = self.LLMObs.llm(
+            model_name=body["model"],
+            name=f"filter:{__name__}",
+            model_provider="open-webui",
+            session_id=body["chat_id"],
+            ml_app=self.valves.ml_app
+        )
+
+        self.LLMObs.annotate(
+            span = self.llm_span,
+            input_data = get_last_user_message(body["messages"]),
+        )
+
+        print("SPAN: ")
+        print(self.llm_span)
+
+        return body
+
+
+    async def outlet(self, body: dict, user: Optional[dict] = None) -> dict:
+        print(f"outlet:{__name__}")
+        if body["chat_id"] not in self.chat_generations:
+            return body
+        print("SELF LLM SPAN")
+        print(self.llm_span)
+        #self.set_dd()
+        self.LLMObs.annotate(
+            span = self.llm_span,
+            output_data = get_last_assistant_message(body["messages"]),
+        )
+
+        self.llm_span.finish()
+        self.LLMObs.flush()
+
+        return body

From cdb2367493b68dc760e3b5ccba430ae02f1e123a Mon Sep 17 00:00:00 2001
From: "0xThresh.eth" <0xthresh@protonmail.com>
Date: Thu, 13 Jun 2024 15:30:36 -0700
Subject: [PATCH 2/4] Add Rust to Docker image, update requirements.txt

---
 Dockerfile       |  6 +++++
 Dockerfile.rust  | 58 ------------------------------------------------
 requirements.txt |  3 ++-
 3 files changed, 8 insertions(+), 59 deletions(-)
 delete mode 100644 Dockerfile.rust

diff --git a/Dockerfile b/Dockerfile
index b6d8c7f..53090a8 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,6 +18,12 @@ RUN apt-get update && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
+# Install Rust 
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+
+# Set up the Rust environment
+ENV PATH="/root/.cargo/bin:${PATH}"
+RUN rustup default stable
 
 WORKDIR /app
 
diff --git a/Dockerfile.rust b/Dockerfile.rust
deleted file mode 100644
index 1f80627..0000000
--- a/Dockerfile.rust
+++ /dev/null
@@ -1,58 +0,0 @@
-FROM python:3.11-slim-bookworm as base
-
-# Use args
-ARG USE_CUDA
-ARG USE_CUDA_VER
-
-## Basis ##
-ENV ENV=prod \
-    PORT=9099 \
-    # pass build args to the build
-    USE_CUDA_DOCKER=${USE_CUDA} \
-    USE_CUDA_DOCKER_VER=${USE_CUDA_VER}
-
-
-# Install GCC and build tools
-RUN apt-get update && \
-    apt-get install -y gcc build-essential curl git && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
-
-
-WORKDIR /app
-
-# Install Python dependencies
-COPY ./requirements.txt .
-RUN pip3 install uv && \
-    if [ "$USE_CUDA" = "true" ]; then \
-    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
-    uv pip install --system -r requirements.txt --no-cache-dir; \
-    else \
-    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
-    uv pip install --system -r requirements.txt --no-cache-dir; \
-    fi
-
-# Copy the application code
-COPY . .
-
-# Install Rust compiler and ddtrace which are required for DataDog components
-RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
-
-# Set up the Rust environment
-ENV PATH="/root/.cargo/bin:${PATH}"
-RUN /root/.cargo/bin/rustup default stable
-
-# DEBUG - check that Rust installed correctly
-RUN cargo --version
-
-# Set the working directory to the Pipelines app dir
-WORKDIR /app
-
-# Install Python dependencies
-RUN pip3 install git+https://github.com/DataDog/dd-trace-py.git@main
-
-# Expose the port
-ENV HOST="0.0.0.0"
-ENV PORT="9099"
-
-ENTRYPOINT [ "bash", "start.sh" ]
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 5f27030..f713bc8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,8 +27,9 @@ redis
 sqlmodel
 chromadb
 
-# Deployment
+# Observability
 langfuse
+git+https://github.com/DataDog/dd-trace-py.git@main
 
 # ML libraries
 torch

From b983e8f2578cc6335e675ed98b710de4efab9d11 Mon Sep 17 00:00:00 2001
From: "0xThresh.eth" <0xthresh@protonmail.com>
Date: Sun, 16 Jun 2024 11:01:41 -0700
Subject: [PATCH 3/4] Testing updated Dockerfile

---
 Dockerfile                                  | 14 +++-----------
 dev-docker.sh                               |  2 +-
 examples/filters/datadog_filter_pipeline.py |  9 ++-------
 requirements.txt                            |  3 ++-
 4 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 53090a8..ac5864d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -18,25 +18,17 @@ RUN apt-get update && \
     apt-get clean && \
     rm -rf /var/lib/apt/lists/*
 
-# Install Rust 
-RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
-
-# Set up the Rust environment
-ENV PATH="/root/.cargo/bin:${PATH}"
-RUN rustup default stable
-
 WORKDIR /app
 
 # Install Python dependencies
 COPY ./requirements.txt .
 RUN pip3 install uv && \
     if [ "$USE_CUDA" = "true" ]; then \
-    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \
-    uv pip install --system -r requirements.txt --no-cache-dir; \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir; \
     else \
-    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \
-    uv pip install --system -r requirements.txt --no-cache-dir; \
+    pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir; \    
     fi
+RUN uv pip install --system -r requirements.txt --no-cache-dir
 
 # Copy the application code
 COPY . .
diff --git a/dev-docker.sh b/dev-docker.sh
index a502b05..9ba69cd 100755
--- a/dev-docker.sh
+++ b/dev-docker.sh
@@ -5,5 +5,5 @@
 # docker volume rm open-webui 
 
 # Runs the containers with Ollama image for Open WebUI and the Pipelines endpoint in place
-docker run -d -p 9099:9099 --add-host=host.docker.internal:host-gateway -v pipelines:/app/pipelines --name pipelines --restart always --env-file .env ghcr.io/open-webui/pipelines:latest
+docker run -d -p 9099:9099 --add-host=host.docker.internal:host-gateway -v pipelines:/app/pipelines --name pipelines --restart always --env-file .env pipelines #ghcr.io/open-webui/pipelines:latest
 docker run -d -p 3000:8080 -v ~/.ollama:/root/.ollama -v open-webui:/app/backend/data --name open-webui --restart always -e OPENAI_API_BASE_URL=http://host.docker.internal:9099 -e OPENAI_API_KEY=0p3n-w3bu! ghcr.io/open-webui/open-webui:ollama
\ No newline at end of file
diff --git a/examples/filters/datadog_filter_pipeline.py b/examples/filters/datadog_filter_pipeline.py
index a55ca9a..6829b6c 100644
--- a/examples/filters/datadog_filter_pipeline.py
+++ b/examples/filters/datadog_filter_pipeline.py
@@ -5,7 +5,7 @@ date: 2024-06-06
 version: 1.0
 license: MIT
 description: A filter pipeline that sends traces to DataDog.
-requirements: git+https://github.com/DataDog/dd-trace-py.git@main
+requirements: ddtrace
 environment_variables: DD_LLMOBS_AGENTLESS_ENABLED, DD_LLMOBS_ENABLED, DD_LLMOBS_APP_NAME, DD_API_KEY, DD_SITE 
 """
 
@@ -104,9 +104,6 @@ class Pipeline:
             input_data = get_last_user_message(body["messages"]),
         )
 
-        print("SPAN: ")
-        print(self.llm_span)
-
         return body
 
 
@@ -114,9 +111,7 @@ class Pipeline:
         print(f"outlet:{__name__}")
         if body["chat_id"] not in self.chat_generations:
             return body
-        print("SELF LLM SPAN")
-        print(self.llm_span)
-        #self.set_dd()
+
         self.LLMObs.annotate(
             span = self.llm_span,
             output_data = get_last_assistant_message(body["messages"]),
diff --git a/requirements.txt b/requirements.txt
index f713bc8..70a9145 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,7 +29,8 @@ chromadb
 
 # Observability
 langfuse
-git+https://github.com/DataDog/dd-trace-py.git@main
+#git+https://github.com/DataDog/dd-trace-py.git@main
+ddtrace
 
 # ML libraries
 torch

From 6d508be4961054c30f25b3eac4c07a83091e31ba Mon Sep 17 00:00:00 2001
From: "0xThresh.eth" <0xthresh@protonmail.com>
Date: Sun, 16 Jun 2024 11:18:13 -0700
Subject: [PATCH 4/4] Resolved outlet issue

---
 dev-docker.sh                               | 2 +-
 examples/filters/datadog_filter_pipeline.py | 2 --
 requirements.txt                            | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/dev-docker.sh b/dev-docker.sh
index 9ba69cd..a502b05 100755
--- a/dev-docker.sh
+++ b/dev-docker.sh
@@ -5,5 +5,5 @@
 # docker volume rm open-webui 
 
 # Runs the containers with Ollama image for Open WebUI and the Pipelines endpoint in place
-docker run -d -p 9099:9099 --add-host=host.docker.internal:host-gateway -v pipelines:/app/pipelines --name pipelines --restart always --env-file .env pipelines #ghcr.io/open-webui/pipelines:latest
+docker run -d -p 9099:9099 --add-host=host.docker.internal:host-gateway -v pipelines:/app/pipelines --name pipelines --restart always --env-file .env ghcr.io/open-webui/pipelines:latest
 docker run -d -p 3000:8080 -v ~/.ollama:/root/.ollama -v open-webui:/app/backend/data --name open-webui --restart always -e OPENAI_API_BASE_URL=http://host.docker.internal:9099 -e OPENAI_API_KEY=0p3n-w3bu! ghcr.io/open-webui/open-webui:ollama
\ No newline at end of file
diff --git a/examples/filters/datadog_filter_pipeline.py b/examples/filters/datadog_filter_pipeline.py
index 6829b6c..af1d2de 100644
--- a/examples/filters/datadog_filter_pipeline.py
+++ b/examples/filters/datadog_filter_pipeline.py
@@ -109,8 +109,6 @@ class Pipeline:
 
     async def outlet(self, body: dict, user: Optional[dict] = None) -> dict:
         print(f"outlet:{__name__}")
-        if body["chat_id"] not in self.chat_generations:
-            return body
 
         self.LLMObs.annotate(
             span = self.llm_span,
diff --git a/requirements.txt b/requirements.txt
index 70a9145..c5700a7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -29,7 +29,6 @@ chromadb
 
 # Observability
 langfuse
-#git+https://github.com/DataDog/dd-trace-py.git@main
 ddtrace
 
 # ML libraries