mirror of
https://github.com/open-webui/pipelines
synced 2025-05-16 10:25:43 +00:00
Merge branch 'open-webui:main' into routellm-pipeline
This commit is contained in:
commit
7168ea3d08
81
examples/filters/presidio_filter_pipeline.py
Normal file
81
examples/filters/presidio_filter_pipeline.py
Normal file
@ -0,0 +1,81 @@
|
||||
"""
|
||||
title: Presidio PII Redaction Pipeline
|
||||
author: justinh-rahb
|
||||
date: 2024-07-07
|
||||
version: 0.1.0
|
||||
license: MIT
|
||||
description: A pipeline for redacting personally identifiable information (PII) using the Presidio library.
|
||||
requirements: presidio-analyzer, presidio-anonymizer
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel
|
||||
from schemas import OpenAIChatMessage
|
||||
from presidio_analyzer import AnalyzerEngine
|
||||
from presidio_anonymizer import AnonymizerEngine
|
||||
from presidio_anonymizer.entities import OperatorConfig
|
||||
|
||||
class Pipeline:
|
||||
class Valves(BaseModel):
|
||||
pipelines: List[str] = ["*"]
|
||||
priority: int = 0
|
||||
enabled_for_admins: bool = False
|
||||
entities_to_redact: List[str] = [
|
||||
"PERSON", "EMAIL_ADDRESS", "PHONE_NUMBER", "US_SSN",
|
||||
"CREDIT_CARD", "IP_ADDRESS", "US_PASSPORT", "LOCATION",
|
||||
"DATE_TIME", "NRP", "MEDICAL_LICENSE", "URL"
|
||||
]
|
||||
language: str = "en"
|
||||
|
||||
def __init__(self):
|
||||
self.type = "filter"
|
||||
self.name = "Presidio PII Redaction Pipeline"
|
||||
|
||||
self.valves = self.Valves(
|
||||
**{
|
||||
"pipelines": os.getenv("PII_REDACT_PIPELINES", "*").split(","),
|
||||
"enabled_for_admins": os.getenv("PII_REDACT_ENABLED_FOR_ADMINS", "false").lower() == "true",
|
||||
"entities_to_redact": os.getenv("PII_REDACT_ENTITIES", ",".join(self.Valves().entities_to_redact)).split(","),
|
||||
"language": os.getenv("PII_REDACT_LANGUAGE", "en"),
|
||||
}
|
||||
)
|
||||
|
||||
self.analyzer = AnalyzerEngine()
|
||||
self.anonymizer = AnonymizerEngine()
|
||||
|
||||
async def on_startup(self):
|
||||
print(f"on_startup:{__name__}")
|
||||
|
||||
async def on_shutdown(self):
|
||||
print(f"on_shutdown:{__name__}")
|
||||
|
||||
def redact_pii(self, text: str) -> str:
|
||||
results = self.analyzer.analyze(
|
||||
text=text,
|
||||
language=self.valves.language,
|
||||
entities=self.valves.entities_to_redact
|
||||
)
|
||||
|
||||
anonymized_text = self.anonymizer.anonymize(
|
||||
text=text,
|
||||
analyzer_results=results,
|
||||
operators={
|
||||
"DEFAULT": OperatorConfig("replace", {"new_value": "[REDACTED]"})
|
||||
}
|
||||
)
|
||||
|
||||
return anonymized_text.text
|
||||
|
||||
async def inlet(self, body: dict, user: Optional[dict] = None) -> dict:
|
||||
print(f"pipe:{__name__}")
|
||||
print(body)
|
||||
print(user)
|
||||
|
||||
if user is None or user.get("role") != "admin" or self.valves.enabled_for_admins:
|
||||
messages = body.get("messages", [])
|
||||
for message in messages:
|
||||
if message.get("role") == "user":
|
||||
message["content"] = self.redact_pii(message["content"])
|
||||
|
||||
return body
|
@ -38,15 +38,15 @@ class Pipeline:
|
||||
# Initialize
|
||||
self.valves = self.Valves(
|
||||
**{
|
||||
"pipelines": ["*"], # Connect to all pipelines
|
||||
"DB_HOST": os.environ["PG_HOST"], # Database hostname
|
||||
"DB_PORT": os.environ["PG_PORT"], # Database port
|
||||
"DB_USER": os.environ["PG_USER"], # User to connect to the database with
|
||||
"DB_PASSWORD": os.environ["PG_PASSWORD"], # Password to connect to the database with
|
||||
"DB_DATABASE": os.environ["PG_DB"], # Database to select on the DB instance
|
||||
"DB_TABLES": ["albums"], # Table(s) to run queries against
|
||||
"OLLAMA_HOST": "http://host.docker.internal:11434", # Make sure to update with the URL of your Ollama host, such as http://localhost:11434 or remote server address
|
||||
"TEXT_TO_SQL_MODEL": "phi3:latest" # Model to use for text-to-SQL generation
|
||||
"pipelines": ["*"], # Connect to all pipelines
|
||||
"DB_HOST": os.getenv("PG_HOST", "http://localhost:5432"), # Database hostname
|
||||
"DB_PORT": os.getenv("PG_PORT", 5432), # Database port
|
||||
"DB_USER": os.getenv("PG_USER", "postgres"), # User to connect to the database with
|
||||
"DB_PASSWORD": os.getenv("PG_PASSWORD", "password"), # Password to connect to the database with
|
||||
"DB_DATABASE": os.getenv("PG_DB", "postgres"), # Database to select on the DB instance
|
||||
"DB_TABLES": ["albums"], # Table(s) to run queries against
|
||||
"OLLAMA_HOST": os.getenv("OLLAMA_HOST", "http://host.docker.internal:11434"), # Make sure to update with the URL of your Ollama host, such as http://localhost:11434 or remote server address
|
||||
"TEXT_TO_SQL_MODEL": "phi3:latest" # Model to use for text-to-SQL generation
|
||||
}
|
||||
)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user