From 58d64c4fcec2c6a20f71e645cc14014b68b24b39 Mon Sep 17 00:00:00 2001
From: Andrew Tait Gehrhardt <atgehrhardt@gmail.com>
Date: Tue, 18 Jun 2024 18:46:01 -0400
Subject: [PATCH 1/4] Pipeline that automatically extracts image, passes to an
 ollama vision model, and overrides the response. This allows for you to have
 a primary text model and temporarily switch to a vision model whenever there
 is an image attached

---
 .../dynamic_ollama_vision_filter_pipeline.py  | 96 +++++++++++++++++++
 1 file changed, 96 insertions(+)
 create mode 100644 examples/filters/dynamic_ollama_vision_filter_pipeline.py

diff --git a/examples/filters/dynamic_ollama_vision_filter_pipeline.py b/examples/filters/dynamic_ollama_vision_filter_pipeline.py
new file mode 100644
index 0000000..d6d28ad
--- /dev/null
+++ b/examples/filters/dynamic_ollama_vision_filter_pipeline.py
@@ -0,0 +1,96 @@
+from typing import List, Optional
+from pydantic import BaseModel
+import json
+import aiohttp
+from utils.pipelines.main import get_last_user_message, get_last_assistant_message
+
+class Pipeline:
+    class Valves(BaseModel):
+        pipelines: List[str] = []
+        priority: int = 0
+        target_user_roles: List[str] = ["admin", "user"]
+        vision_model: str = "llava"
+        ollama_base_url: str = "http://host.docker.interal:11434"
+
+    def __init__(self):
+        self.type = "filter"
+        self.name = "Interception Filter"
+        self.valves = self.Valves(
+            **{
+                "pipelines": ["*"],  # Connect to all pipelines
+            }
+        )
+
+    async def on_startup(self):
+        print(f"on_startup:{__name__}")
+        pass
+
+    async def on_shutdown(self):
+        print(f"on_shutdown:{__name__}")
+        pass
+
+    async def process_images_with_llava(self, images: List[str], content: str, vision_model: str, ollama_base_url: str) -> str:
+        url = f"{ollama_base_url}/api/chat"
+        payload = {
+            "model": vision_model,
+            "messages": [
+                {
+                    "role": "user",
+                    "content": content,
+                    "images": images
+                }
+            ]
+        }
+
+        async with aiohttp.ClientSession() as session:
+            async with session.post(url, json=payload) as response:
+                if response.status == 200:
+                    content = []
+                    async for line in response.content:
+                        data = json.loads(line)
+                        content.append(data.get("message", {}).get("content", ""))
+                    return "".join(content)
+                else:
+                    print(f"Failed to process images with LLava, status code: {response.status}")
+                    return ""
+
+    async def inlet(self, body: dict, user: Optional[dict] = None) -> dict:
+        print(f"pipe:{__name__}")
+
+        images = []
+
+        # Ensure the body is a dictionary
+        if isinstance(body, str):
+            body = json.loads(body)
+
+        if user and user.get("role", "admin") in self.valves.target_user_roles:
+            messages = body.get("messages", [])
+            for message in messages:
+                if "images" in message:
+                    images.extend(message["images"])
+
+            # Get the content of the most recent message
+            if messages:
+                user_message = get_last_user_message(body["messages"])
+                print("CURRENT MESSAGE:", user_message)
+
+            # Process the images with LLava
+            if images:
+                print("IMAGES: True")
+                llava_response = await self.process_images_with_llava(images, user_message, self.valves.vision_model,self.valves.ollama_base_url)
+                print("LLAVA RESPONSE:", llava_response)
+                
+                # Override the content for the user role
+                for message in messages:
+                    if message.get("role") == "user":
+                        message["content"] = llava_response
+
+        else:
+            print("IMAGES: False")
+        
+        print(f"""
+            THIS IS THE BODY OBJECT:
+            {body}
+            """)
+        
+        return body

From cc936574e5f7fba77aadc8e8f8180751f3251eb9 Mon Sep 17 00:00:00 2001
From: Andrew Tait Gehrhardt <atgehrhardt@gmail.com>
Date: Tue, 18 Jun 2024 18:46:58 -0400
Subject: [PATCH 2/4] Adding metadata

---
 .../filters/dynamic_ollama_vision_filter_pipeline.py   | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/examples/filters/dynamic_ollama_vision_filter_pipeline.py b/examples/filters/dynamic_ollama_vision_filter_pipeline.py
index d6d28ad..bed4a6c 100644
--- a/examples/filters/dynamic_ollama_vision_filter_pipeline.py
+++ b/examples/filters/dynamic_ollama_vision_filter_pipeline.py
@@ -1,3 +1,13 @@
+"""
+title: HomeAssistant Filter Pipeline
+author: Andrew Tait Gehrhardt
+date: 2024-06-15
+version: 1.0
+license: MIT
+description: A pipeline for controlling Home Assistant entities based on their easy names. Only supports lights at the moment.
+requirements: pydantic, aiohttp
+"""
+
 from typing import List, Optional
 from pydantic import BaseModel
 import json

From 7d817b6b6abe7ffbc7b7bdbde1f52a6666476943 Mon Sep 17 00:00:00 2001
From: Andrew Tait Gehrhardt <atgehrhardt@gmail.com>
Date: Tue, 18 Jun 2024 21:05:18 -0400
Subject: [PATCH 3/4] Fixed metadata. Fixed image retention.

---
 .../dynamic_ollama_vision_filter_pipeline.py  | 47 +++++++++----------
 1 file changed, 21 insertions(+), 26 deletions(-)

diff --git a/examples/filters/dynamic_ollama_vision_filter_pipeline.py b/examples/filters/dynamic_ollama_vision_filter_pipeline.py
index bed4a6c..d5458cd 100644
--- a/examples/filters/dynamic_ollama_vision_filter_pipeline.py
+++ b/examples/filters/dynamic_ollama_vision_filter_pipeline.py
@@ -1,10 +1,10 @@
 """
-title: HomeAssistant Filter Pipeline
+title: Ollama Dynamic Vision Pipeline
 author: Andrew Tait Gehrhardt
-date: 2024-06-15
+date: 2024-06-18
 version: 1.0
 license: MIT
-description: A pipeline for controlling Home Assistant entities based on their easy names. Only supports lights at the moment.
+description: A pipeline for dynamically processing images when current model is a text only model
 requirements: pydantic, aiohttp
 """
 
@@ -12,15 +12,15 @@ from typing import List, Optional
 from pydantic import BaseModel
 import json
 import aiohttp
-from utils.pipelines.main import get_last_user_message, get_last_assistant_message
+from utils.pipelines.main import get_last_user_message
 
 class Pipeline:
     class Valves(BaseModel):
         pipelines: List[str] = []
         priority: int = 0
-        target_user_roles: List[str] = ["admin", "user"]
         vision_model: str = "llava"
-        ollama_base_url: str = "http://host.docker.interal:11434"
+        ollama_base_url: str = ""
+        model_to_override: str = ""
 
     def __init__(self):
         self.type = "filter"
@@ -72,31 +72,26 @@ class Pipeline:
         # Ensure the body is a dictionary
         if isinstance(body, str):
             body = json.loads(body)
+        
+        model = body.get("model", "")
+        print(f"MODEL NAME: {model}")
 
-        if user and user.get("role", "admin") in self.valves.target_user_roles:
+        # Get the content of the most recent message
+        user_message = get_last_user_message(body["messages"])
+        print("CURRENT MESSAGE:", user_message)
+
+        if model in self.valves.model_to_override:
             messages = body.get("messages", [])
             for message in messages:
                 if "images" in message:
                     images.extend(message["images"])
-
-            # Get the content of the most recent message
-            if messages:
-                user_message = get_last_user_message(body["messages"])
-                print("CURRENT MESSAGE:", user_message)
-
-            # Process the images with LLava
-            if images:
-                print("IMAGES: True")
-                llava_response = await self.process_images_with_llava(images, user_message, self.valves.vision_model,self.valves.ollama_base_url)
-                print("LLAVA RESPONSE:", llava_response)
-                
-                # Override the content for the user role
-                for message in messages:
-                    if message.get("role") == "user":
-                        message["content"] = llava_response
-
-        else:
-            print("IMAGES: False")
+                    print("IMAGES: True")
+                    llava_response = await self.process_images_with_llava(images, user_message, self.valves.vision_model,self.valves.ollama_base_url)
+                    message["content"] = llava_response
+                    print("LLAVA RESPONSE:", llava_response)
+                    message.pop("images", None)  # This will safely remove the 'images' key if it exists
+                else:
+                    print("IMAGES: False")
         
         print(f"""
             THIS IS THE BODY OBJECT:

From c360d2bda8fc518d2ba5c7be5b51ffbe1dede4cb Mon Sep 17 00:00:00 2001
From: Andrew Tait Gehrhardt <atgehrhardt@gmail.com>
Date: Tue, 18 Jun 2024 21:12:47 -0400
Subject: [PATCH 4/4] Cleaning up

---
 .../dynamic_ollama_vision_filter_pipeline.py       | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/examples/filters/dynamic_ollama_vision_filter_pipeline.py b/examples/filters/dynamic_ollama_vision_filter_pipeline.py
index d5458cd..9eb01d2 100644
--- a/examples/filters/dynamic_ollama_vision_filter_pipeline.py
+++ b/examples/filters/dynamic_ollama_vision_filter_pipeline.py
@@ -74,28 +74,18 @@ class Pipeline:
             body = json.loads(body)
         
         model = body.get("model", "")
-        print(f"MODEL NAME: {model}")
 
         # Get the content of the most recent message
         user_message = get_last_user_message(body["messages"])
-        print("CURRENT MESSAGE:", user_message)
 
         if model in self.valves.model_to_override:
             messages = body.get("messages", [])
             for message in messages:
                 if "images" in message:
                     images.extend(message["images"])
-                    print("IMAGES: True")
-                    llava_response = await self.process_images_with_llava(images, user_message, self.valves.vision_model,self.valves.ollama_base_url)
+                    raw_llava_response = await self.process_images_with_llava(images, user_message, self.valves.vision_model,self.valves.ollama_base_url)
+                    llava_response = f"REPEAT THIS BACK: {raw_llava_response}"
                     message["content"] = llava_response
-                    print("LLAVA RESPONSE:", llava_response)
                     message.pop("images", None)  # This will safely remove the 'images' key if it exists
-                else:
-                    print("IMAGES: False")
-        
-        print(f"""
-            THIS IS THE BODY OBJECT:
-            {body}
-            """)
         
         return body