From f1800e2853e9f896392ad2fe84c029bafe545998 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 02:14:45 +0000 Subject: [PATCH 01/45] build(deps): bump pyjwt[crypto] from 2.9.0 to 2.10.1 in /backend Bumps [pyjwt[crypto]](https://github.com/jpadilla/pyjwt) from 2.9.0 to 2.10.1. - [Release notes](https://github.com/jpadilla/pyjwt/releases) - [Changelog](https://github.com/jpadilla/pyjwt/blob/master/CHANGELOG.rst) - [Commits](https://github.com/jpadilla/pyjwt/compare/2.9.0...2.10.1) --- updated-dependencies: - dependency-name: pyjwt[crypto] dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index e38624879..4b602e639 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -78,7 +78,7 @@ rank-bm25==0.2.2 faster-whisper==1.0.3 -PyJWT[crypto]==2.9.0 +PyJWT[crypto]==2.10.1 authlib==1.3.2 black==24.8.0 From 236410cd9594be4ade0e9043a9addef58bde166c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 02:14:48 +0000 Subject: [PATCH 02/45] build(deps): bump fpdf2 from 2.7.9 to 2.8.2 in /backend Bumps [fpdf2](https://github.com/py-pdf/fpdf2) from 2.7.9 to 2.8.2. - [Release notes](https://github.com/py-pdf/fpdf2/releases) - [Changelog](https://github.com/py-pdf/fpdf2/blob/master/CHANGELOG.md) - [Commits](https://github.com/py-pdf/fpdf2/compare/2.7.9...2.8.2) --- updated-dependencies: - dependency-name: fpdf2 dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index e38624879..e80d9fde1 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -55,7 +55,7 @@ einops==0.8.0 ftfy==6.2.3 pypdf==4.3.1 -fpdf2==2.7.9 +fpdf2==2.8.2 pymdown-extensions==10.11.2 docx2txt==0.8 python-pptx==1.0.0 From 5bb308a0983b15353f8ada4c7791deb8e454684f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 02:14:51 +0000 Subject: [PATCH 03/45] build(deps): bump validators from 0.33.0 to 0.34.0 in /backend Bumps [validators](https://github.com/python-validators/validators) from 0.33.0 to 0.34.0. - [Release notes](https://github.com/python-validators/validators/releases) - [Changelog](https://github.com/python-validators/validators/blob/master/CHANGES.md) - [Commits](https://github.com/python-validators/validators/compare/0.33.0...0.34.0) --- updated-dependencies: - dependency-name: validators dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index e38624879..37a9f76c4 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -67,7 +67,7 @@ pandas==2.2.3 openpyxl==3.1.5 pyxlsb==1.0.10 xlrd==2.0.1 -validators==0.33.0 +validators==0.34.0 psutil sentencepiece soundfile==0.12.1 From b41c1128c6259d78d0598e69af30f947c81b5490 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 02:14:54 +0000 Subject: [PATCH 04/45] build(deps): bump flask from 3.0.3 to 3.1.0 in /backend Bumps [flask](https://github.com/pallets/flask) from 3.0.3 to 3.1.0. - [Release notes](https://github.com/pallets/flask/releases) - [Changelog](https://github.com/pallets/flask/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/flask/compare/3.0.3...3.1.0) --- updated-dependencies: - dependency-name: flask dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index e38624879..98826fd68 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -3,7 +3,7 @@ uvicorn[standard]==0.30.6 pydantic==2.9.2 python-multipart==0.0.18 -Flask==3.0.3 +Flask==3.1.0 Flask-Cors==5.0.0 python-socketio==5.11.3 From 7fb87ddf99daa5d719e39c5834ee88efc09cc49d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 02:15:02 +0000 Subject: [PATCH 05/45] build(deps): bump peewee from 3.17.6 to 3.17.8 in /backend Bumps [peewee](https://github.com/coleifer/peewee) from 3.17.6 to 3.17.8. - [Release notes](https://github.com/coleifer/peewee/releases) - [Changelog](https://github.com/coleifer/peewee/blob/master/CHANGELOG.md) - [Commits](https://github.com/coleifer/peewee/compare/3.17.6...3.17.8) --- updated-dependencies: - dependency-name: peewee dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/requirements.txt b/backend/requirements.txt index e38624879..b6f06a958 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -18,7 +18,7 @@ aiofiles sqlalchemy==2.0.32 alembic==1.14.0 -peewee==3.17.6 +peewee==3.17.8 peewee-migrate==1.12.2 psycopg2-binary==2.9.9 pgvector==0.3.5 From 60fc24cd39270994fdbec481db317b70c788ad3e Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Tue, 31 Dec 2024 19:42:49 -0800 Subject: [PATCH 06/45] refac: channel message input --- src/lib/components/channel/Channel.svelte | 2 +- src/lib/components/channel/MessageInput.svelte | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/lib/components/channel/Channel.svelte b/src/lib/components/channel/Channel.svelte index b205afcb3..68d35a4c6 100644 --- a/src/lib/components/channel/Channel.svelte +++ b/src/lib/components/channel/Channel.svelte @@ -136,7 +136,7 @@ }; const submitHandler = async ({ content, data }) => { - if (!content) { + if (!content && (data?.files ?? []).length === 0) { return; } diff --git a/src/lib/components/channel/MessageInput.svelte b/src/lib/components/channel/MessageInput.svelte index c0605da8c..aabcb2be1 100644 --- a/src/lib/components/channel/MessageInput.svelte +++ b/src/lib/components/channel/MessageInput.svelte @@ -243,7 +243,7 @@ }; const submitHandler = async () => { - if (content === '') { + if (content === '' && files.length === 0) { return; } @@ -581,11 +581,11 @@ From 996ade90904e82dcac7f6db0d79ab5d6c0aed4f7 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 2 Jan 2025 21:39:40 -0800 Subject: [PATCH 21/45] refac: notification sound behaviour --- src/lib/components/NotificationToast.svelte | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/lib/components/NotificationToast.svelte b/src/lib/components/NotificationToast.svelte index 0cd416d7e..9e8ad907b 100644 --- a/src/lib/components/NotificationToast.svelte +++ b/src/lib/components/NotificationToast.svelte @@ -12,6 +12,10 @@ export let content: string; onMount(() => { + if (!navigator.userActivation.hasBeenActive) { + return; + } + if ($settings?.notificationSound ?? true) { const audio = new Audio(`/audio/notification.mp3`); audio.play(); From 70b74b5217e39b6eccbf5f5daed19f25e3b21a20 Mon Sep 17 00:00:00 2001 From: Jason Kidd Date: Fri, 3 Jan 2025 09:11:09 -0800 Subject: [PATCH 22/45] feat: Allow setting the initial vector length on pgvector document_chunk table --- backend/open_webui/config.py | 3 ++ .../retrieval/vector/dbs/pgvector.py | 40 ++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index d8ea985fe..a48b2db05 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -1211,6 +1211,9 @@ if VECTOR_DB == "pgvector" and not PGVECTOR_DB_URL.startswith("postgres"): raise ValueError( "Pgvector requires setting PGVECTOR_DB_URL or using Postgres with vector extension as the primary database." ) +PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH = int( + os.environ.get("PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH", "1536") +) #################################### # Information Retrieval (RAG) diff --git a/backend/open_webui/retrieval/vector/dbs/pgvector.py b/backend/open_webui/retrieval/vector/dbs/pgvector.py index cb8c545e9..64b6fd6c7 100644 --- a/backend/open_webui/retrieval/vector/dbs/pgvector.py +++ b/backend/open_webui/retrieval/vector/dbs/pgvector.py @@ -5,6 +5,7 @@ from sqlalchemy import ( create_engine, Column, Integer, + MetaData, select, text, Text, @@ -19,9 +20,9 @@ from pgvector.sqlalchemy import Vector from sqlalchemy.ext.mutable import MutableDict from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult -from open_webui.config import PGVECTOR_DB_URL +from open_webui.config import PGVECTOR_DB_URL, PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH -VECTOR_LENGTH = 1536 +VECTOR_LENGTH = PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH Base = declarative_base() @@ -56,6 +57,9 @@ class PgvectorClient: # Ensure the pgvector extension is available self.session.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) + # Check vector length consistency + self.check_vector_length() + # Create the tables if they do not exist # Base.metadata.create_all requires a bind (engine or connection) # Get the connection from the session @@ -82,6 +86,38 @@ class PgvectorClient: print(f"Error during initialization: {e}") raise + def check_vector_length(self) -> None: + """ + Check if the VECTOR_LENGTH matches the existing vector column dimension in the database. + Raises an exception if there is a mismatch. + """ + metadata = MetaData() + metadata.reflect(bind=self.session.bind, only=["document_chunk"]) + + if "document_chunk" in metadata.tables: + document_chunk_table = metadata.tables["document_chunk"] + if "vector" in document_chunk_table.columns: + vector_column = document_chunk_table.columns["vector"] + vector_type = vector_column.type + if isinstance(vector_type, Vector): + db_vector_length = vector_type.dim + if db_vector_length != VECTOR_LENGTH: + raise Exception( + f"VECTOR_LENGTH {VECTOR_LENGTH} does not match existing vector column dimension {db_vector_length}. " + "Cannot change vector size after initialization without migrating the data." + ) + else: + raise Exception( + "The 'vector' column exists but is not of type 'Vector'." + ) + else: + raise Exception( + "The 'vector' column does not exist in the 'document_chunk' table." + ) + else: + # Table does not exist yet; no action needed + pass + def adjust_vector_length(self, vector: List[float]) -> List[float]: # Adjust vector to have length VECTOR_LENGTH current_length = len(vector) From 2e5fbba961340b2d7fe568dea644401c13db545d Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 3 Jan 2025 14:06:43 -0500 Subject: [PATCH 23/45] Add github action to codespell main on push and PRs --- .github/workflows/codespell.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/codespell.yml diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml new file mode 100644 index 000000000..b23166743 --- /dev/null +++ b/.github/workflows/codespell.yml @@ -0,0 +1,25 @@ +# Codespell configuration is within pyproject.toml +--- +name: Codespell + +on: + push: + branches: [main] + pull_request: + branches: [main] + +permissions: + contents: read + +jobs: + codespell: + name: Check for spelling errors + runs-on: ubuntu-latest + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Annotate locations with typos + uses: codespell-project/codespell-problem-matcher@v1 + - name: Codespell + uses: codespell-project/actions-codespell@v2 From 1a928bc6a4a49bfe296211e6baf1c9f63d6c6c24 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 3 Jan 2025 14:06:43 -0500 Subject: [PATCH 24/45] Add rudimentary codespell config --- pyproject.toml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index de14a9fa1..2aa69204e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -151,3 +151,10 @@ exclude = [ "chroma.sqlite3", ] force-include = { "CHANGELOG.md" = "open_webui/CHANGELOG.md", build = "open_webui/frontend" } + +[tool.codespell] +# Ref: https://github.com/codespell-project/codespell#using-a-config-file +skip = '.git*,*.svg,package-lock.json,i18n,*.lock,*.css,*-bundle.js,locales,example-doc.txt,emoji-shortcodes.json' +check-hidden = true +# ignore-regex = '' +ignore-words-list = 'ans' From 8f1953e6672851b50df64c54abcc97dbfc438e75 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 3 Jan 2025 14:09:47 -0500 Subject: [PATCH 25/45] [DATALAD RUNCMD] run codespell throughout fixing few left typos automagically === Do not change lines below === { "chain": [], "cmd": "codespell -w", "exit": 0, "extra_inputs": [], "inputs": [], "outputs": [], "pwd": "." } ^^^ Do not change lines above ^^^ --- CODE_OF_CONDUCT.md | 2 +- backend/open_webui/retrieval/web/testdata/brave.json | 2 +- src/lib/components/common/Textarea.svelte | 2 +- src/routes/+layout.js | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index b1c7b56a3..eb54b4894 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -2,7 +2,7 @@ ## Our Pledge -As members, contributors, and leaders of this community, we pledge to make participation in our open-source project a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. +As members, contributors, and leaders of this community, we pledge to make participation in our open-source project a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, religion, or sexual identity and orientation. We are committed to creating and maintaining an open, respectful, and professional environment where positive contributions and meaningful discussions can flourish. By participating in this project, you agree to uphold these values and align your behavior to the standards outlined in this Code of Conduct. diff --git a/backend/open_webui/retrieval/web/testdata/brave.json b/backend/open_webui/retrieval/web/testdata/brave.json index 38487390d..0cc72109e 100644 --- a/backend/open_webui/retrieval/web/testdata/brave.json +++ b/backend/open_webui/retrieval/web/testdata/brave.json @@ -683,7 +683,7 @@ "age": "October 29, 2022", "extra_snippets": [ "You can pass many options to the configure script; run ./configure --help to find out more. On macOS case-insensitive file systems and on Cygwin, the executable is called python.exe; elsewhere it's just python.", - "Building a complete Python installation requires the use of various additional third-party libraries, depending on your build platform and configure options. Not all standard library modules are buildable or useable on all platforms. Refer to the Install dependencies section of the Developer Guide for current detailed information on dependencies for various Linux distributions and macOS.", + "Building a complete Python installation requires the use of various additional third-party libraries, depending on your build platform and configure options. Not all standard library modules are buildable or usable on all platforms. Refer to the Install dependencies section of the Developer Guide for current detailed information on dependencies for various Linux distributions and macOS.", "To get an optimized build of Python, configure --enable-optimizations before you run make. This sets the default make targets up to enable Profile Guided Optimization (PGO) and may be used to auto-enable Link Time Optimization (LTO) on some platforms. For more details, see the sections below.", "Copyright © 2001-2024 Python Software Foundation. All rights reserved." ] diff --git a/src/lib/components/common/Textarea.svelte b/src/lib/components/common/Textarea.svelte index ab5ebe2ac..bcd5b4d75 100644 --- a/src/lib/components/common/Textarea.svelte +++ b/src/lib/components/common/Textarea.svelte @@ -56,7 +56,7 @@