mirror of
				https://github.com/open-webui/open-webui
				synced 2025-06-26 18:26:48 +00:00 
			
		
		
		
	Merge pull request #8637 from kahghi/add-gcs-storage-provider
feat:add GCSStorageProvider
This commit is contained in:
		
						commit
						80e004c31f
					
				@ -662,6 +662,9 @@ S3_REGION_NAME = os.environ.get("S3_REGION_NAME", None)
 | 
			
		||||
S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", None)
 | 
			
		||||
S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", None)
 | 
			
		||||
 | 
			
		||||
GCS_BUCKET_NAME = os.environ.get("GCS_BUCKET_NAME", None)
 | 
			
		||||
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS_JSON", None)
 | 
			
		||||
 | 
			
		||||
####################################
 | 
			
		||||
# File Upload DIR
 | 
			
		||||
####################################
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
import os
 | 
			
		||||
import shutil
 | 
			
		||||
import json
 | 
			
		||||
from abc import ABC, abstractmethod
 | 
			
		||||
from typing import BinaryIO, Tuple
 | 
			
		||||
 | 
			
		||||
@ -11,9 +12,13 @@ from open_webui.config import (
 | 
			
		||||
    S3_ENDPOINT_URL,
 | 
			
		||||
    S3_REGION_NAME,
 | 
			
		||||
    S3_SECRET_ACCESS_KEY,
 | 
			
		||||
    GCS_BUCKET_NAME,
 | 
			
		||||
    GOOGLE_APPLICATION_CREDENTIALS_JSON,
 | 
			
		||||
    STORAGE_PROVIDER,
 | 
			
		||||
    UPLOAD_DIR,
 | 
			
		||||
)
 | 
			
		||||
from google.cloud import storage
 | 
			
		||||
from google.cloud.exceptions import GoogleCloudError, NotFound
 | 
			
		||||
from open_webui.constants import ERROR_MESSAGES
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -137,15 +142,76 @@ class S3StorageProvider(StorageProvider):
 | 
			
		||||
        # Always delete from local storage
 | 
			
		||||
        LocalStorageProvider.delete_all_files()
 | 
			
		||||
 | 
			
		||||
class GCSStorageProvider(StorageProvider):
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        self.bucket_name = GCS_BUCKET_NAME
 | 
			
		||||
    
 | 
			
		||||
        if GOOGLE_APPLICATION_CREDENTIALS_JSON:
 | 
			
		||||
            self.gcs_client = storage.Client.from_service_account_info(info=json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON))
 | 
			
		||||
        else:
 | 
			
		||||
            # if no credentials json is provided, credentials will be picked up from the environment
 | 
			
		||||
            # if running on local environment, credentials would be user credentials
 | 
			
		||||
            # if running on a Compute Engine instance, credentials would be from Google Metadata server
 | 
			
		||||
            self.gcs_client = storage.Client()
 | 
			
		||||
        self.bucket = self.gcs_client.bucket(GCS_BUCKET_NAME)
 | 
			
		||||
    
 | 
			
		||||
    def upload_file(self, file: BinaryIO, filename: str) -> Tuple[bytes, str]:
 | 
			
		||||
        """Handles uploading of the file to GCS storage."""
 | 
			
		||||
        contents, file_path = LocalStorageProvider.upload_file(file, filename)
 | 
			
		||||
        try:
 | 
			
		||||
            blob = self.bucket.blob(filename)
 | 
			
		||||
            blob.upload_from_filename(file_path)
 | 
			
		||||
            return contents, "gs://" + self.bucket_name + "/" + filename
 | 
			
		||||
        except GoogleCloudError as e:
 | 
			
		||||
            raise RuntimeError(f"Error uploading file to GCS: {e}")
 | 
			
		||||
 | 
			
		||||
    def get_file(self, file_path:str) -> str:
 | 
			
		||||
        """Handles downloading of the file from GCS storage."""
 | 
			
		||||
        try:
 | 
			
		||||
            filename = file_path.removeprefix("gs://").split("/")[1]
 | 
			
		||||
            local_file_path = f"{UPLOAD_DIR}/{filename}"            
 | 
			
		||||
            blob = self.bucket.get_blob(filename)
 | 
			
		||||
            blob.download_to_filename(local_file_path)
 | 
			
		||||
 | 
			
		||||
            return local_file_path
 | 
			
		||||
        except NotFound as e:
 | 
			
		||||
            raise RuntimeError(f"Error downloading file from GCS: {e}")
 | 
			
		||||
    
 | 
			
		||||
    def delete_file(self, file_path:str) -> None:
 | 
			
		||||
        """Handles deletion of the file from GCS storage."""
 | 
			
		||||
        try:
 | 
			
		||||
            filename = file_path.removeprefix("gs://").split("/")[1]
 | 
			
		||||
            blob = self.bucket.get_blob(filename)
 | 
			
		||||
            blob.delete()
 | 
			
		||||
        except NotFound as e:
 | 
			
		||||
            raise RuntimeError(f"Error deleting file from GCS: {e}")
 | 
			
		||||
        
 | 
			
		||||
        # Always delete from local storage
 | 
			
		||||
        LocalStorageProvider.delete_file(file_path)
 | 
			
		||||
 | 
			
		||||
    def delete_all_files(self) -> None:
 | 
			
		||||
        """Handles deletion of all files from GCS storage."""
 | 
			
		||||
        try:
 | 
			
		||||
            blobs = self.bucket.list_blobs()
 | 
			
		||||
 | 
			
		||||
            for blob in blobs:
 | 
			
		||||
                blob.delete()
 | 
			
		||||
 | 
			
		||||
        except NotFound as e:
 | 
			
		||||
            raise RuntimeError(f"Error deleting all files from GCS: {e}")
 | 
			
		||||
        
 | 
			
		||||
        # Always delete from local storage
 | 
			
		||||
        LocalStorageProvider.delete_all_files()
 | 
			
		||||
 | 
			
		||||
def get_storage_provider(storage_provider: str):
 | 
			
		||||
    if storage_provider == "local":
 | 
			
		||||
        Storage = LocalStorageProvider()
 | 
			
		||||
    elif storage_provider == "s3":
 | 
			
		||||
        Storage = S3StorageProvider()
 | 
			
		||||
    elif storage_provider == "gcs":
 | 
			
		||||
        Storage = GCSStorageProvider()
 | 
			
		||||
    else:
 | 
			
		||||
        raise RuntimeError(f"Unsupported storage provider: {storage_provider}")
 | 
			
		||||
    return Storage
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
Storage = get_storage_provider(STORAGE_PROVIDER)
 | 
			
		||||
 | 
			
		||||
@ -1,10 +1,12 @@
 | 
			
		||||
import io
 | 
			
		||||
 | 
			
		||||
import os
 | 
			
		||||
import boto3
 | 
			
		||||
import pytest
 | 
			
		||||
from botocore.exceptions import ClientError
 | 
			
		||||
from moto import mock_aws
 | 
			
		||||
from open_webui.storage import provider
 | 
			
		||||
from gcp_storage_emulator.server import create_server
 | 
			
		||||
from google.cloud import storage
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def mock_upload_dir(monkeypatch, tmp_path):
 | 
			
		||||
@ -19,6 +21,7 @@ def test_imports():
 | 
			
		||||
    provider.StorageProvider
 | 
			
		||||
    provider.LocalStorageProvider
 | 
			
		||||
    provider.S3StorageProvider
 | 
			
		||||
    provider.GCSStorageProvider
 | 
			
		||||
    provider.Storage
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -27,6 +30,8 @@ def test_get_storage_provider():
 | 
			
		||||
    assert isinstance(Storage, provider.LocalStorageProvider)
 | 
			
		||||
    Storage = provider.get_storage_provider("s3")
 | 
			
		||||
    assert isinstance(Storage, provider.S3StorageProvider)
 | 
			
		||||
    Storage = provider.get_storage_provider("gcs")
 | 
			
		||||
    assert isinstance(Storage, provider.GCSStorageProvider)
 | 
			
		||||
    with pytest.raises(RuntimeError):
 | 
			
		||||
        provider.get_storage_provider("invalid")
 | 
			
		||||
 | 
			
		||||
@ -42,6 +47,7 @@ def test_class_instantiation():
 | 
			
		||||
        Test()
 | 
			
		||||
    provider.LocalStorageProvider()
 | 
			
		||||
    provider.S3StorageProvider()
 | 
			
		||||
    provider.GCSStorageProvider()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestLocalStorageProvider:
 | 
			
		||||
@ -175,3 +181,91 @@ class TestS3StorageProvider:
 | 
			
		||||
        self.Storage.delete_all_files()
 | 
			
		||||
        assert not (upload_dir / self.filename).exists()
 | 
			
		||||
        assert not (upload_dir / self.filename_extra).exists()
 | 
			
		||||
 | 
			
		||||
class TestGCSStorageProvider:
 | 
			
		||||
    Storage = provider.GCSStorageProvider()
 | 
			
		||||
    Storage.bucket_name = "my-bucket"
 | 
			
		||||
    file_content = b"test content"
 | 
			
		||||
    filename = "test.txt"
 | 
			
		||||
    filename_extra = "test_exyta.txt"
 | 
			
		||||
    file_bytesio_empty = io.BytesIO()
 | 
			
		||||
 | 
			
		||||
    @pytest.fixture(scope="class")
 | 
			
		||||
    def setup(self):
 | 
			
		||||
        host, port = "localhost", 9023
 | 
			
		||||
 | 
			
		||||
        server = create_server(host, port, in_memory=True)
 | 
			
		||||
        server.start()
 | 
			
		||||
        os.environ["STORAGE_EMULATOR_HOST"] = f"http://{host}:{port}"
 | 
			
		||||
 | 
			
		||||
        gcs_client = storage.Client()
 | 
			
		||||
        bucket = gcs_client.bucket(self.Storage.bucket_name)
 | 
			
		||||
        bucket.create()
 | 
			
		||||
        self.Storage.gcs_client, self.Storage.bucket = gcs_client, bucket
 | 
			
		||||
        yield 
 | 
			
		||||
        bucket.delete(force=True)
 | 
			
		||||
        server.stop()
 | 
			
		||||
    
 | 
			
		||||
    def test_upload_file(self, monkeypatch, tmp_path, setup):
 | 
			
		||||
        upload_dir = mock_upload_dir(monkeypatch, tmp_path)
 | 
			
		||||
        # catch error if bucket does not exist
 | 
			
		||||
        with pytest.raises(Exception):
 | 
			
		||||
            self.Storage.bucket = monkeypatch(self.Storage, "bucket", None)  
 | 
			
		||||
            self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
 | 
			
		||||
        contents, gcs_file_path = self.Storage.upload_file(
 | 
			
		||||
            io.BytesIO(self.file_content), self.filename
 | 
			
		||||
        )
 | 
			
		||||
        object = self.Storage.bucket.get_blob(self.filename)
 | 
			
		||||
        assert self.file_content == object.download_as_bytes()
 | 
			
		||||
        # local checks
 | 
			
		||||
        assert (upload_dir / self.filename).exists()
 | 
			
		||||
        assert (upload_dir / self.filename).read_bytes() == self.file_content
 | 
			
		||||
        assert contents == self.file_content
 | 
			
		||||
        assert gcs_file_path == "gs://" + self.Storage.bucket_name + "/" + self.filename
 | 
			
		||||
        # test error if file is empty
 | 
			
		||||
        with pytest.raises(ValueError):
 | 
			
		||||
            self.Storage.upload_file(self.file_bytesio_empty, self.filename)
 | 
			
		||||
 | 
			
		||||
    def test_get_file(self, monkeypatch, tmp_path, setup):
 | 
			
		||||
        upload_dir = mock_upload_dir(monkeypatch, tmp_path)
 | 
			
		||||
        contents, gcs_file_path = self.Storage.upload_file(
 | 
			
		||||
            io.BytesIO(self.file_content), self.filename
 | 
			
		||||
        )
 | 
			
		||||
        file_path = self.Storage.get_file(gcs_file_path)
 | 
			
		||||
        assert file_path == str(upload_dir / self.filename)
 | 
			
		||||
        assert (upload_dir / self.filename).exists()
 | 
			
		||||
 | 
			
		||||
    def test_delete_file(self, monkeypatch, tmp_path, setup):
 | 
			
		||||
        upload_dir = mock_upload_dir(monkeypatch, tmp_path)
 | 
			
		||||
        contents, gcs_file_path = self.Storage.upload_file(
 | 
			
		||||
            io.BytesIO(self.file_content), self.filename
 | 
			
		||||
        )
 | 
			
		||||
        # ensure that local directory has the uploaded file as well
 | 
			
		||||
        assert (upload_dir / self.filename).exists()
 | 
			
		||||
        assert self.Storage.bucket.get_blob(self.filename).name == self.filename
 | 
			
		||||
        self.Storage.delete_file(gcs_file_path)
 | 
			
		||||
        # check that deleting file from gcs will delete the local file as well
 | 
			
		||||
        assert not (upload_dir / self.filename).exists()
 | 
			
		||||
        assert self.Storage.bucket.get_blob(self.filename) == None
 | 
			
		||||
 | 
			
		||||
    def test_delete_all_files(self, monkeypatch, tmp_path, setup):
 | 
			
		||||
        upload_dir = mock_upload_dir(monkeypatch, tmp_path)
 | 
			
		||||
        # create 2 files
 | 
			
		||||
        self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
 | 
			
		||||
        object = self.Storage.bucket.get_blob(self.filename)
 | 
			
		||||
        assert (upload_dir / self.filename).exists()
 | 
			
		||||
        assert (upload_dir / self.filename).read_bytes() == self.file_content
 | 
			
		||||
        assert self.Storage.bucket.get_blob(self.filename).name == self.filename
 | 
			
		||||
        assert self.file_content == object.download_as_bytes()
 | 
			
		||||
        self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra)
 | 
			
		||||
        object = self.Storage.bucket.get_blob(self.filename_extra)
 | 
			
		||||
        assert (upload_dir / self.filename_extra).exists()
 | 
			
		||||
        assert (upload_dir / self.filename_extra).read_bytes() == self.file_content
 | 
			
		||||
        assert self.Storage.bucket.get_blob(self.filename_extra).name == self.filename_extra
 | 
			
		||||
        assert self.file_content == object.download_as_bytes()
 | 
			
		||||
 | 
			
		||||
        self.Storage.delete_all_files()
 | 
			
		||||
        assert not (upload_dir / self.filename).exists()
 | 
			
		||||
        assert not (upload_dir / self.filename_extra).exists()
 | 
			
		||||
        assert self.Storage.bucket.get_blob(self.filename) == None
 | 
			
		||||
        assert self.Storage.bucket.get_blob(self.filename_extra) == None
 | 
			
		||||
 | 
			
		||||
@ -102,6 +102,7 @@ pytest~=8.3.2
 | 
			
		||||
pytest-docker~=3.1.1
 | 
			
		||||
 | 
			
		||||
googleapis-common-protos==1.63.2
 | 
			
		||||
google-cloud-storage==2.19.0
 | 
			
		||||
 | 
			
		||||
## LDAP
 | 
			
		||||
ldap3==2.9.1
 | 
			
		||||
 | 
			
		||||
@ -102,8 +102,9 @@ dependencies = [
 | 
			
		||||
    "moto[s3]>=5.0.26",
 | 
			
		||||
 | 
			
		||||
    "googleapis-common-protos==1.63.2",
 | 
			
		||||
 | 
			
		||||
    "ldap3==2.9.1"
 | 
			
		||||
    "ldap3==2.9.1",
 | 
			
		||||
    "google-cloud-storage==2.19.0",
 | 
			
		||||
    "gcp-storage-emulator>=2024.8.3",
 | 
			
		||||
]
 | 
			
		||||
readme = "README.md"
 | 
			
		||||
requires-python = ">= 3.11, < 3.13.0a1"
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user