Merge branch 'open-webui:main' into routellm-pipeline

This commit is contained in:
Justin Hayes 2024-07-30 08:50:46 -04:00 committed by GitHub
commit 7168ea3d08
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 90 additions and 9 deletions

View File

@ -0,0 +1,81 @@
"""
title: Presidio PII Redaction Pipeline
author: justinh-rahb
date: 2024-07-07
version: 0.1.0
license: MIT
description: A pipeline for redacting personally identifiable information (PII) using the Presidio library.
requirements: presidio-analyzer, presidio-anonymizer
"""
import os
from typing import List, Optional
from pydantic import BaseModel
from schemas import OpenAIChatMessage
from presidio_analyzer import AnalyzerEngine
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig
class Pipeline:
class Valves(BaseModel):
pipelines: List[str] = ["*"]
priority: int = 0
enabled_for_admins: bool = False
entities_to_redact: List[str] = [
"PERSON", "EMAIL_ADDRESS", "PHONE_NUMBER", "US_SSN",
"CREDIT_CARD", "IP_ADDRESS", "US_PASSPORT", "LOCATION",
"DATE_TIME", "NRP", "MEDICAL_LICENSE", "URL"
]
language: str = "en"
def __init__(self):
self.type = "filter"
self.name = "Presidio PII Redaction Pipeline"
self.valves = self.Valves(
**{
"pipelines": os.getenv("PII_REDACT_PIPELINES", "*").split(","),
"enabled_for_admins": os.getenv("PII_REDACT_ENABLED_FOR_ADMINS", "false").lower() == "true",
"entities_to_redact": os.getenv("PII_REDACT_ENTITIES", ",".join(self.Valves().entities_to_redact)).split(","),
"language": os.getenv("PII_REDACT_LANGUAGE", "en"),
}
)
self.analyzer = AnalyzerEngine()
self.anonymizer = AnonymizerEngine()
async def on_startup(self):
print(f"on_startup:{__name__}")
async def on_shutdown(self):
print(f"on_shutdown:{__name__}")
def redact_pii(self, text: str) -> str:
results = self.analyzer.analyze(
text=text,
language=self.valves.language,
entities=self.valves.entities_to_redact
)
anonymized_text = self.anonymizer.anonymize(
text=text,
analyzer_results=results,
operators={
"DEFAULT": OperatorConfig("replace", {"new_value": "[REDACTED]"})
}
)
return anonymized_text.text
async def inlet(self, body: dict, user: Optional[dict] = None) -> dict:
print(f"pipe:{__name__}")
print(body)
print(user)
if user is None or user.get("role") != "admin" or self.valves.enabled_for_admins:
messages = body.get("messages", [])
for message in messages:
if message.get("role") == "user":
message["content"] = self.redact_pii(message["content"])
return body

View File

@ -39,13 +39,13 @@ class Pipeline:
self.valves = self.Valves(
**{
"pipelines": ["*"], # Connect to all pipelines
"DB_HOST": os.environ["PG_HOST"], # Database hostname
"DB_PORT": os.environ["PG_PORT"], # Database port
"DB_USER": os.environ["PG_USER"], # User to connect to the database with
"DB_PASSWORD": os.environ["PG_PASSWORD"], # Password to connect to the database with
"DB_DATABASE": os.environ["PG_DB"], # Database to select on the DB instance
"DB_HOST": os.getenv("PG_HOST", "http://localhost:5432"), # Database hostname
"DB_PORT": os.getenv("PG_PORT", 5432), # Database port
"DB_USER": os.getenv("PG_USER", "postgres"), # User to connect to the database with
"DB_PASSWORD": os.getenv("PG_PASSWORD", "password"), # Password to connect to the database with
"DB_DATABASE": os.getenv("PG_DB", "postgres"), # Database to select on the DB instance
"DB_TABLES": ["albums"], # Table(s) to run queries against
"OLLAMA_HOST": "http://host.docker.internal:11434", # Make sure to update with the URL of your Ollama host, such as http://localhost:11434 or remote server address
"OLLAMA_HOST": os.getenv("OLLAMA_HOST", "http://host.docker.internal:11434"), # Make sure to update with the URL of your Ollama host, such as http://localhost:11434 or remote server address
"TEXT_TO_SQL_MODEL": "phi3:latest" # Model to use for text-to-SQL generation
}
)