diff --git a/CHANGELOG.md b/CHANGELOG.md
index a05bb2de7..91da40b9e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,21 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.6.5] - 2025-04-14
+
+### Added
+
+- 🛂 **Granular Voice Feature Permissions Per User Group**: Admins can now separately manage access to Speech-to-Text (record voice), Text-to-Speech (read aloud), and Tool Calls for each user group—giving teams tighter control over voice features and enhanced governance across roles.
+- 🗣️ **Toggle Voice Activity Detection (VAD) for Whisper STT**: New environment variable lets you enable/disable VAD filtering with built-in Whisper speech-to-text, giving you flexibility to optimize for different audio quality and response accuracy levels.
+- 📋 **Copy Formatted Response Mode**: You can now enable “Copy Formatted” in Settings > Interface to copy AI responses exactly as styled (with rich formatting, links, and structure preserved), making it faster and cleaner to paste into documents, emails, or reports.
+- ⚙️ **Backend Stability and Performance Enhancements**: General backend refactoring improves system resilience, consistency, and overall reliability—offering smoother performance across workflows whether chatting, generating media, or using external tools.
+- 🌎 **Translation Refinements Across Multiple Languages**: Updated translations deliver smoother language localization, clearer labels, and improved international usability throughout the UI—ensuring a better experience for non-English speakers.
+
+### Fixed
+
+- 🛠️ **LDAP Login Reliability Restored**: Resolved a critical issue where some LDAP setups failed due to attribute parsing—ensuring consistent, secure, and seamless user authentication across enterprise deployments.
+- 🖼️ **Image Generation in Temporary Chats Now Works Properly**: Fixed a bug where image outputs weren’t generated during temporary chats—visual content can now be used reliably in all chat modes without interruptions.
+
## [0.6.4] - 2025-04-12
### Fixed
diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py
index bd822d06d..3b40977f2 100644
--- a/backend/open_webui/config.py
+++ b/backend/open_webui/config.py
@@ -1062,6 +1062,18 @@ USER_PERMISSIONS_CHAT_EDIT = (
os.environ.get("USER_PERMISSIONS_CHAT_EDIT", "True").lower() == "true"
)
+USER_PERMISSIONS_CHAT_STT = (
+ os.environ.get("USER_PERMISSIONS_CHAT_STT", "True").lower() == "true"
+)
+
+USER_PERMISSIONS_CHAT_TTS = (
+ os.environ.get("USER_PERMISSIONS_CHAT_TTS", "True").lower() == "true"
+)
+
+USER_PERMISSIONS_CHAT_CALL = (
+ os.environ.get("USER_PERMISSIONS_CHAT_CALL", "True").lower() == "true"
+)
+
USER_PERMISSIONS_CHAT_MULTIPLE_MODELS = (
os.environ.get("USER_PERMISSIONS_CHAT_MULTIPLE_MODELS", "True").lower() == "true"
)
@@ -1114,6 +1126,9 @@ DEFAULT_USER_PERMISSIONS = {
"file_upload": USER_PERMISSIONS_CHAT_FILE_UPLOAD,
"delete": USER_PERMISSIONS_CHAT_DELETE,
"edit": USER_PERMISSIONS_CHAT_EDIT,
+ "stt": USER_PERMISSIONS_CHAT_STT,
+ "tts": USER_PERMISSIONS_CHAT_TTS,
+ "call": USER_PERMISSIONS_CHAT_CALL,
"multiple_models": USER_PERMISSIONS_CHAT_MULTIPLE_MODELS,
"temporary": USER_PERMISSIONS_CHAT_TEMPORARY,
"temporary_enforced": USER_PERMISSIONS_CHAT_TEMPORARY_ENFORCED,
@@ -2489,6 +2504,13 @@ WHISPER_MODEL_AUTO_UPDATE = (
and os.environ.get("WHISPER_MODEL_AUTO_UPDATE", "").lower() == "true"
)
+WHISPER_VAD_FILTER = PersistentConfig(
+ "WHISPER_VAD_FILTER",
+ "audio.stt.whisper_vad_filter",
+ os.getenv("WHISPER_VAD_FILTER", "False").lower() == "true",
+)
+
+
# Add Deepgram configuration
DEEPGRAM_API_KEY = PersistentConfig(
"DEEPGRAM_API_KEY",
@@ -2496,6 +2518,7 @@ DEEPGRAM_API_KEY = PersistentConfig(
os.getenv("DEEPGRAM_API_KEY", ""),
)
+
AUDIO_STT_OPENAI_API_BASE_URL = PersistentConfig(
"AUDIO_STT_OPENAI_API_BASE_URL",
"audio.stt.openai.api_base_url",
diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py
index 1d1efc5df..56ea17fa1 100644
--- a/backend/open_webui/main.py
+++ b/backend/open_webui/main.py
@@ -166,6 +166,7 @@ from open_webui.config import (
FIRECRAWL_API_KEY,
WEB_LOADER_ENGINE,
WHISPER_MODEL,
+ WHISPER_VAD_FILTER,
DEEPGRAM_API_KEY,
WHISPER_MODEL_AUTO_UPDATE,
WHISPER_MODEL_DIR,
@@ -789,6 +790,7 @@ app.state.config.STT_ENGINE = AUDIO_STT_ENGINE
app.state.config.STT_MODEL = AUDIO_STT_MODEL
app.state.config.WHISPER_MODEL = WHISPER_MODEL
+app.state.config.WHISPER_VAD_FILTER = WHISPER_VAD_FILTER
app.state.config.DEEPGRAM_API_KEY = DEEPGRAM_API_KEY
app.state.config.AUDIO_STT_AZURE_API_KEY = AUDIO_STT_AZURE_API_KEY
@@ -1023,14 +1025,19 @@ async def get_models(request: Request, user=Depends(get_verified_user)):
if "pipeline" in model and model["pipeline"].get("type", None) == "filter":
continue
- model_tags = [
- tag.get("name")
- for tag in model.get("info", {}).get("meta", {}).get("tags", [])
- ]
- tags = [tag.get("name") for tag in model.get("tags", [])]
+ try:
+ model_tags = [
+ tag.get("name")
+ for tag in model.get("info", {}).get("meta", {}).get("tags", [])
+ ]
+ tags = [tag.get("name") for tag in model.get("tags", [])]
- tags = list(set(model_tags + tags))
- model["tags"] = [{"name": tag} for tag in tags]
+ tags = list(set(model_tags + tags))
+ model["tags"] = [{"name": tag} for tag in tags]
+ except Exception as e:
+ log.debug(f"Error processing model tags: {e}")
+ model["tags"] = []
+ pass
models.append(model)
diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py
index a5447e1fc..da51d1ecf 100644
--- a/backend/open_webui/routers/audio.py
+++ b/backend/open_webui/routers/audio.py
@@ -497,7 +497,11 @@ def transcribe(request: Request, file_path):
)
model = request.app.state.faster_whisper_model
- segments, info = model.transcribe(file_path, beam_size=5)
+ segments, info = model.transcribe(
+ file_path,
+ beam_size=5,
+ vad_filter=request.app.state.config.WHISPER_VAD_FILTER,
+ )
log.info(
"Detected language '%s' with probability %f"
% (info.language, info.language_probability)
diff --git a/backend/open_webui/routers/auths.py b/backend/open_webui/routers/auths.py
index 6574ef0b1..9c4d5cb9f 100644
--- a/backend/open_webui/routers/auths.py
+++ b/backend/open_webui/routers/auths.py
@@ -230,13 +230,15 @@ async def ldap_auth(request: Request, response: Response, form_data: LdapForm):
entry = connection_app.entries[0]
username = str(entry[f"{LDAP_ATTRIBUTE_FOR_USERNAME}"]).lower()
- email = entry[f"{LDAP_ATTRIBUTE_FOR_MAIL}"]
+ email = entry[f"{LDAP_ATTRIBUTE_FOR_MAIL}"].value # retrive the Attribute value
if not email:
raise HTTPException(400, "User does not have a valid email address.")
elif isinstance(email, str):
email = email.lower()
elif isinstance(email, list):
email = email[0].lower()
+ else:
+ email = str(email).lower()
cn = str(entry["cn"])
user_dn = entry.entry_dn
diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py
index 8f89351ac..13f012483 100644
--- a/backend/open_webui/routers/retrieval.py
+++ b/backend/open_webui/routers/retrieval.py
@@ -1520,20 +1520,20 @@ async def process_web_search(
else:
collection_names = []
for doc_idx, doc in enumerate(docs):
- collection_name = f"web-search-{calculate_sha256_string(form_data.query + '-' + urls[doc_idx])}"[
- :63
- ]
+ if doc and doc.page_content:
+ collection_name = f"web-search-{calculate_sha256_string(form_data.query + '-' + urls[doc_idx])}"[
+ :63
+ ]
- collection_names.append(collection_name)
-
- await run_in_threadpool(
- save_docs_to_vector_db,
- request,
- [doc],
- collection_name,
- overwrite=True,
- user=user,
- )
+ collection_names.append(collection_name)
+ await run_in_threadpool(
+ save_docs_to_vector_db,
+ request,
+ [doc],
+ collection_name,
+ overwrite=True,
+ user=user,
+ )
return {
"status": True,
diff --git a/backend/open_webui/routers/users.py b/backend/open_webui/routers/users.py
index 781676567..a9ac34e2f 100644
--- a/backend/open_webui/routers/users.py
+++ b/backend/open_webui/routers/users.py
@@ -88,6 +88,9 @@ class ChatPermissions(BaseModel):
file_upload: bool = True
delete: bool = True
edit: bool = True
+ stt: bool = True
+ tts: bool = True
+ call: bool = True
multiple_models: bool = True
temporary: bool = True
temporary_enforced: bool = False
diff --git a/backend/open_webui/socket/main.py b/backend/open_webui/socket/main.py
index 2c64d4bf7..282f4db95 100644
--- a/backend/open_webui/socket/main.py
+++ b/backend/open_webui/socket/main.py
@@ -339,16 +339,17 @@ def get_event_emitter(request_info, update_db=True):
request_info["message_id"],
)
- content = message.get("content", "")
- content += event_data.get("data", {}).get("content", "")
+ if message:
+ content = message.get("content", "")
+ content += event_data.get("data", {}).get("content", "")
- Chats.upsert_message_to_chat_by_id_and_message_id(
- request_info["chat_id"],
- request_info["message_id"],
- {
- "content": content,
- },
- )
+ Chats.upsert_message_to_chat_by_id_and_message_id(
+ request_info["chat_id"],
+ request_info["message_id"],
+ {
+ "content": content,
+ },
+ )
if "type" in event_data and event_data["type"] == "replace":
content = event_data.get("data", {}).get("content", "")
diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py
index 0f3dc67f5..4070bc697 100644
--- a/backend/open_webui/utils/middleware.py
+++ b/backend/open_webui/utils/middleware.py
@@ -534,13 +534,20 @@ async def chat_image_generation_handler(
}
)
- for image in images:
- await __event_emitter__(
- {
- "type": "message",
- "data": {"content": f"\n"},
- }
- )
+ await __event_emitter__(
+ {
+ "type": "files",
+ "data": {
+ "files": [
+ {
+ "type": "image",
+ "url": image["url"],
+ }
+ for image in images
+ ]
+ },
+ }
+ )
system_message_content = "