From c0a60f1b0f74ff9f010efcdbf0acc1c7681c9846 Mon Sep 17 00:00:00 2001 From: ther3zz <40278044+ther3zz@users.noreply.github.com> Date: Fri, 28 Mar 2025 10:30:32 -0400 Subject: [PATCH] Model Name vs ID for Generation adds the ability to use the model name as the model value when inserting generation observations into langfuse. Also adds both model name and id in metadata. --- examples/filters/langfuse_filter_pipeline.py | 49 ++++++++++++++++++-- 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/examples/filters/langfuse_filter_pipeline.py b/examples/filters/langfuse_filter_pipeline.py index 48f453a..cd2c0ab 100644 --- a/examples/filters/langfuse_filter_pipeline.py +++ b/examples/filters/langfuse_filter_pipeline.py @@ -1,8 +1,8 @@ """ title: Langfuse Filter Pipeline author: open-webui -date: 2025-03-04 -version: 1.6 +date: 2025-03-28 +version: 1.7 license: MIT description: A filter pipeline that uses Langfuse. requirements: langfuse @@ -36,6 +36,8 @@ class Pipeline: host: str # New valve that controls whether task names are added as tags: insert_tags: bool = True + # New valve that controls whether to use model name instead of model ID for generation + use_model_name_instead_of_id_for_generation: bool = False debug: bool = False def __init__(self): @@ -48,6 +50,7 @@ class Pipeline: "secret_key": os.getenv("LANGFUSE_SECRET_KEY", "your-secret-key-here"), "public_key": os.getenv("LANGFUSE_PUBLIC_KEY", "your-public-key-here"), "host": os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com"), + "use_model_name_instead_of_id_for_generation": os.getenv("USE_MODEL_NAME", "false").lower() == "true", "debug": os.getenv("DEBUG_MODE", "false").lower() == "true", } ) @@ -55,6 +58,8 @@ class Pipeline: self.langfuse = None self.chat_traces = {} self.suppressed_logs = set() + # Dictionary to store model names for each chat + self.model_names = {} # Only these tasks will be treated as LLM "generations": self.GENERATION_TASKS = {"llm_response"} @@ -124,6 +129,20 @@ class Pipeline: metadata["chat_id"] = chat_id body["metadata"] = metadata + # Extract and store both model name and ID if available + model_info = metadata.get("model", {}) + model_id = body.get("model") + + # Store model information for this chat + if chat_id not in self.model_names: + self.model_names[chat_id] = {"id": model_id} + else: + self.model_names[chat_id]["id"] = model_id + + if isinstance(model_info, dict) and "name" in model_info: + self.model_names[chat_id]["name"] = model_info["name"] + self.log(f"Stored model info - name: '{model_info['name']}', id: '{model_id}' for chat_id: {chat_id}") + required_keys = ["model", "messages"] missing_keys = [key for key in required_keys if key not in body] if missing_keys: @@ -169,9 +188,20 @@ class Pipeline: # If it's a task that is considered an LLM generation if task_name in self.GENERATION_TASKS: + # Determine which model value to use based on the use_model_name valve + model_id = self.model_names.get(chat_id, {}).get("id", body["model"]) + model_name = self.model_names.get(chat_id, {}).get("name", "unknown") + + # Pick primary model identifier based on valve setting + model_value = model_name if self.valves.use_model_name_instead_of_id_for_generation else model_id + + # Add both values to metadata regardless of valve setting + metadata["model_id"] = model_id + metadata["model_name"] = model_name + generation_payload = { "name": f"{task_name}:{str(uuid.uuid4())}", - "model": body["model"], + "model": model_value, "input": body["messages"], "metadata": metadata, } @@ -241,10 +271,21 @@ class Pipeline: metadata["interface"] = "open-webui" if task_name in self.GENERATION_TASKS: + # Determine which model value to use based on the use_model_name valve + model_id = self.model_names.get(chat_id, {}).get("id", body.get("model")) + model_name = self.model_names.get(chat_id, {}).get("name", "unknown") + + # Pick primary model identifier based on valve setting + model_value = model_name if self.valves.use_model_name_instead_of_id_for_generation else model_id + + # Add both values to metadata regardless of valve setting + metadata["model_id"] = model_id + metadata["model_name"] = model_name + # If it's an LLM generation generation_payload = { "name": f"{task_name}:{str(uuid.uuid4())}", - "model": body.get("model"), # <-- Include the model in LLM generation + "model": model_value, # <-- Use model name or ID based on valve setting "input": body["messages"], "metadata": metadata, "usage": usage,