mirror of
https://github.com/open-webui/open-webui
synced 2025-01-22 10:45:47 +00:00
Merge pull request #8637 from kahghi/add-gcs-storage-provider
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run
Integration Test / Run Cypress Integration Tests (push) Waiting to run
Integration Test / Run Migration Tests (push) Waiting to run
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run
Integration Test / Run Cypress Integration Tests (push) Waiting to run
Integration Test / Run Migration Tests (push) Waiting to run
feat:add GCSStorageProvider
This commit is contained in:
commit
80e004c31f
@ -662,6 +662,9 @@ S3_REGION_NAME = os.environ.get("S3_REGION_NAME", None)
|
|||||||
S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", None)
|
S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", None)
|
||||||
S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", None)
|
S3_ENDPOINT_URL = os.environ.get("S3_ENDPOINT_URL", None)
|
||||||
|
|
||||||
|
GCS_BUCKET_NAME = os.environ.get("GCS_BUCKET_NAME", None)
|
||||||
|
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS_JSON", None)
|
||||||
|
|
||||||
####################################
|
####################################
|
||||||
# File Upload DIR
|
# File Upload DIR
|
||||||
####################################
|
####################################
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import json
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import BinaryIO, Tuple
|
from typing import BinaryIO, Tuple
|
||||||
|
|
||||||
@ -11,9 +12,13 @@ from open_webui.config import (
|
|||||||
S3_ENDPOINT_URL,
|
S3_ENDPOINT_URL,
|
||||||
S3_REGION_NAME,
|
S3_REGION_NAME,
|
||||||
S3_SECRET_ACCESS_KEY,
|
S3_SECRET_ACCESS_KEY,
|
||||||
|
GCS_BUCKET_NAME,
|
||||||
|
GOOGLE_APPLICATION_CREDENTIALS_JSON,
|
||||||
STORAGE_PROVIDER,
|
STORAGE_PROVIDER,
|
||||||
UPLOAD_DIR,
|
UPLOAD_DIR,
|
||||||
)
|
)
|
||||||
|
from google.cloud import storage
|
||||||
|
from google.cloud.exceptions import GoogleCloudError, NotFound
|
||||||
from open_webui.constants import ERROR_MESSAGES
|
from open_webui.constants import ERROR_MESSAGES
|
||||||
|
|
||||||
|
|
||||||
@ -137,15 +142,76 @@ class S3StorageProvider(StorageProvider):
|
|||||||
# Always delete from local storage
|
# Always delete from local storage
|
||||||
LocalStorageProvider.delete_all_files()
|
LocalStorageProvider.delete_all_files()
|
||||||
|
|
||||||
|
class GCSStorageProvider(StorageProvider):
|
||||||
|
def __init__(self):
|
||||||
|
self.bucket_name = GCS_BUCKET_NAME
|
||||||
|
|
||||||
|
if GOOGLE_APPLICATION_CREDENTIALS_JSON:
|
||||||
|
self.gcs_client = storage.Client.from_service_account_info(info=json.loads(GOOGLE_APPLICATION_CREDENTIALS_JSON))
|
||||||
|
else:
|
||||||
|
# if no credentials json is provided, credentials will be picked up from the environment
|
||||||
|
# if running on local environment, credentials would be user credentials
|
||||||
|
# if running on a Compute Engine instance, credentials would be from Google Metadata server
|
||||||
|
self.gcs_client = storage.Client()
|
||||||
|
self.bucket = self.gcs_client.bucket(GCS_BUCKET_NAME)
|
||||||
|
|
||||||
|
def upload_file(self, file: BinaryIO, filename: str) -> Tuple[bytes, str]:
|
||||||
|
"""Handles uploading of the file to GCS storage."""
|
||||||
|
contents, file_path = LocalStorageProvider.upload_file(file, filename)
|
||||||
|
try:
|
||||||
|
blob = self.bucket.blob(filename)
|
||||||
|
blob.upload_from_filename(file_path)
|
||||||
|
return contents, "gs://" + self.bucket_name + "/" + filename
|
||||||
|
except GoogleCloudError as e:
|
||||||
|
raise RuntimeError(f"Error uploading file to GCS: {e}")
|
||||||
|
|
||||||
|
def get_file(self, file_path:str) -> str:
|
||||||
|
"""Handles downloading of the file from GCS storage."""
|
||||||
|
try:
|
||||||
|
filename = file_path.removeprefix("gs://").split("/")[1]
|
||||||
|
local_file_path = f"{UPLOAD_DIR}/{filename}"
|
||||||
|
blob = self.bucket.get_blob(filename)
|
||||||
|
blob.download_to_filename(local_file_path)
|
||||||
|
|
||||||
|
return local_file_path
|
||||||
|
except NotFound as e:
|
||||||
|
raise RuntimeError(f"Error downloading file from GCS: {e}")
|
||||||
|
|
||||||
|
def delete_file(self, file_path:str) -> None:
|
||||||
|
"""Handles deletion of the file from GCS storage."""
|
||||||
|
try:
|
||||||
|
filename = file_path.removeprefix("gs://").split("/")[1]
|
||||||
|
blob = self.bucket.get_blob(filename)
|
||||||
|
blob.delete()
|
||||||
|
except NotFound as e:
|
||||||
|
raise RuntimeError(f"Error deleting file from GCS: {e}")
|
||||||
|
|
||||||
|
# Always delete from local storage
|
||||||
|
LocalStorageProvider.delete_file(file_path)
|
||||||
|
|
||||||
|
def delete_all_files(self) -> None:
|
||||||
|
"""Handles deletion of all files from GCS storage."""
|
||||||
|
try:
|
||||||
|
blobs = self.bucket.list_blobs()
|
||||||
|
|
||||||
|
for blob in blobs:
|
||||||
|
blob.delete()
|
||||||
|
|
||||||
|
except NotFound as e:
|
||||||
|
raise RuntimeError(f"Error deleting all files from GCS: {e}")
|
||||||
|
|
||||||
|
# Always delete from local storage
|
||||||
|
LocalStorageProvider.delete_all_files()
|
||||||
|
|
||||||
def get_storage_provider(storage_provider: str):
|
def get_storage_provider(storage_provider: str):
|
||||||
if storage_provider == "local":
|
if storage_provider == "local":
|
||||||
Storage = LocalStorageProvider()
|
Storage = LocalStorageProvider()
|
||||||
elif storage_provider == "s3":
|
elif storage_provider == "s3":
|
||||||
Storage = S3StorageProvider()
|
Storage = S3StorageProvider()
|
||||||
|
elif storage_provider == "gcs":
|
||||||
|
Storage = GCSStorageProvider()
|
||||||
else:
|
else:
|
||||||
raise RuntimeError(f"Unsupported storage provider: {storage_provider}")
|
raise RuntimeError(f"Unsupported storage provider: {storage_provider}")
|
||||||
return Storage
|
return Storage
|
||||||
|
|
||||||
|
|
||||||
Storage = get_storage_provider(STORAGE_PROVIDER)
|
Storage = get_storage_provider(STORAGE_PROVIDER)
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
import io
|
import io
|
||||||
|
import os
|
||||||
import boto3
|
import boto3
|
||||||
import pytest
|
import pytest
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
from moto import mock_aws
|
from moto import mock_aws
|
||||||
from open_webui.storage import provider
|
from open_webui.storage import provider
|
||||||
|
from gcp_storage_emulator.server import create_server
|
||||||
|
from google.cloud import storage
|
||||||
|
|
||||||
|
|
||||||
def mock_upload_dir(monkeypatch, tmp_path):
|
def mock_upload_dir(monkeypatch, tmp_path):
|
||||||
@ -19,6 +21,7 @@ def test_imports():
|
|||||||
provider.StorageProvider
|
provider.StorageProvider
|
||||||
provider.LocalStorageProvider
|
provider.LocalStorageProvider
|
||||||
provider.S3StorageProvider
|
provider.S3StorageProvider
|
||||||
|
provider.GCSStorageProvider
|
||||||
provider.Storage
|
provider.Storage
|
||||||
|
|
||||||
|
|
||||||
@ -27,6 +30,8 @@ def test_get_storage_provider():
|
|||||||
assert isinstance(Storage, provider.LocalStorageProvider)
|
assert isinstance(Storage, provider.LocalStorageProvider)
|
||||||
Storage = provider.get_storage_provider("s3")
|
Storage = provider.get_storage_provider("s3")
|
||||||
assert isinstance(Storage, provider.S3StorageProvider)
|
assert isinstance(Storage, provider.S3StorageProvider)
|
||||||
|
Storage = provider.get_storage_provider("gcs")
|
||||||
|
assert isinstance(Storage, provider.GCSStorageProvider)
|
||||||
with pytest.raises(RuntimeError):
|
with pytest.raises(RuntimeError):
|
||||||
provider.get_storage_provider("invalid")
|
provider.get_storage_provider("invalid")
|
||||||
|
|
||||||
@ -42,6 +47,7 @@ def test_class_instantiation():
|
|||||||
Test()
|
Test()
|
||||||
provider.LocalStorageProvider()
|
provider.LocalStorageProvider()
|
||||||
provider.S3StorageProvider()
|
provider.S3StorageProvider()
|
||||||
|
provider.GCSStorageProvider()
|
||||||
|
|
||||||
|
|
||||||
class TestLocalStorageProvider:
|
class TestLocalStorageProvider:
|
||||||
@ -175,3 +181,91 @@ class TestS3StorageProvider:
|
|||||||
self.Storage.delete_all_files()
|
self.Storage.delete_all_files()
|
||||||
assert not (upload_dir / self.filename).exists()
|
assert not (upload_dir / self.filename).exists()
|
||||||
assert not (upload_dir / self.filename_extra).exists()
|
assert not (upload_dir / self.filename_extra).exists()
|
||||||
|
|
||||||
|
class TestGCSStorageProvider:
|
||||||
|
Storage = provider.GCSStorageProvider()
|
||||||
|
Storage.bucket_name = "my-bucket"
|
||||||
|
file_content = b"test content"
|
||||||
|
filename = "test.txt"
|
||||||
|
filename_extra = "test_exyta.txt"
|
||||||
|
file_bytesio_empty = io.BytesIO()
|
||||||
|
|
||||||
|
@pytest.fixture(scope="class")
|
||||||
|
def setup(self):
|
||||||
|
host, port = "localhost", 9023
|
||||||
|
|
||||||
|
server = create_server(host, port, in_memory=True)
|
||||||
|
server.start()
|
||||||
|
os.environ["STORAGE_EMULATOR_HOST"] = f"http://{host}:{port}"
|
||||||
|
|
||||||
|
gcs_client = storage.Client()
|
||||||
|
bucket = gcs_client.bucket(self.Storage.bucket_name)
|
||||||
|
bucket.create()
|
||||||
|
self.Storage.gcs_client, self.Storage.bucket = gcs_client, bucket
|
||||||
|
yield
|
||||||
|
bucket.delete(force=True)
|
||||||
|
server.stop()
|
||||||
|
|
||||||
|
def test_upload_file(self, monkeypatch, tmp_path, setup):
|
||||||
|
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
|
||||||
|
# catch error if bucket does not exist
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
self.Storage.bucket = monkeypatch(self.Storage, "bucket", None)
|
||||||
|
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
|
||||||
|
contents, gcs_file_path = self.Storage.upload_file(
|
||||||
|
io.BytesIO(self.file_content), self.filename
|
||||||
|
)
|
||||||
|
object = self.Storage.bucket.get_blob(self.filename)
|
||||||
|
assert self.file_content == object.download_as_bytes()
|
||||||
|
# local checks
|
||||||
|
assert (upload_dir / self.filename).exists()
|
||||||
|
assert (upload_dir / self.filename).read_bytes() == self.file_content
|
||||||
|
assert contents == self.file_content
|
||||||
|
assert gcs_file_path == "gs://" + self.Storage.bucket_name + "/" + self.filename
|
||||||
|
# test error if file is empty
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
self.Storage.upload_file(self.file_bytesio_empty, self.filename)
|
||||||
|
|
||||||
|
def test_get_file(self, monkeypatch, tmp_path, setup):
|
||||||
|
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
|
||||||
|
contents, gcs_file_path = self.Storage.upload_file(
|
||||||
|
io.BytesIO(self.file_content), self.filename
|
||||||
|
)
|
||||||
|
file_path = self.Storage.get_file(gcs_file_path)
|
||||||
|
assert file_path == str(upload_dir / self.filename)
|
||||||
|
assert (upload_dir / self.filename).exists()
|
||||||
|
|
||||||
|
def test_delete_file(self, monkeypatch, tmp_path, setup):
|
||||||
|
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
|
||||||
|
contents, gcs_file_path = self.Storage.upload_file(
|
||||||
|
io.BytesIO(self.file_content), self.filename
|
||||||
|
)
|
||||||
|
# ensure that local directory has the uploaded file as well
|
||||||
|
assert (upload_dir / self.filename).exists()
|
||||||
|
assert self.Storage.bucket.get_blob(self.filename).name == self.filename
|
||||||
|
self.Storage.delete_file(gcs_file_path)
|
||||||
|
# check that deleting file from gcs will delete the local file as well
|
||||||
|
assert not (upload_dir / self.filename).exists()
|
||||||
|
assert self.Storage.bucket.get_blob(self.filename) == None
|
||||||
|
|
||||||
|
def test_delete_all_files(self, monkeypatch, tmp_path, setup):
|
||||||
|
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
|
||||||
|
# create 2 files
|
||||||
|
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
|
||||||
|
object = self.Storage.bucket.get_blob(self.filename)
|
||||||
|
assert (upload_dir / self.filename).exists()
|
||||||
|
assert (upload_dir / self.filename).read_bytes() == self.file_content
|
||||||
|
assert self.Storage.bucket.get_blob(self.filename).name == self.filename
|
||||||
|
assert self.file_content == object.download_as_bytes()
|
||||||
|
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra)
|
||||||
|
object = self.Storage.bucket.get_blob(self.filename_extra)
|
||||||
|
assert (upload_dir / self.filename_extra).exists()
|
||||||
|
assert (upload_dir / self.filename_extra).read_bytes() == self.file_content
|
||||||
|
assert self.Storage.bucket.get_blob(self.filename_extra).name == self.filename_extra
|
||||||
|
assert self.file_content == object.download_as_bytes()
|
||||||
|
|
||||||
|
self.Storage.delete_all_files()
|
||||||
|
assert not (upload_dir / self.filename).exists()
|
||||||
|
assert not (upload_dir / self.filename_extra).exists()
|
||||||
|
assert self.Storage.bucket.get_blob(self.filename) == None
|
||||||
|
assert self.Storage.bucket.get_blob(self.filename_extra) == None
|
||||||
|
@ -102,6 +102,7 @@ pytest~=8.3.2
|
|||||||
pytest-docker~=3.1.1
|
pytest-docker~=3.1.1
|
||||||
|
|
||||||
googleapis-common-protos==1.63.2
|
googleapis-common-protos==1.63.2
|
||||||
|
google-cloud-storage==2.19.0
|
||||||
|
|
||||||
## LDAP
|
## LDAP
|
||||||
ldap3==2.9.1
|
ldap3==2.9.1
|
||||||
|
@ -102,8 +102,9 @@ dependencies = [
|
|||||||
"moto[s3]>=5.0.26",
|
"moto[s3]>=5.0.26",
|
||||||
|
|
||||||
"googleapis-common-protos==1.63.2",
|
"googleapis-common-protos==1.63.2",
|
||||||
|
"ldap3==2.9.1",
|
||||||
"ldap3==2.9.1"
|
"google-cloud-storage==2.19.0",
|
||||||
|
"gcp-storage-emulator>=2024.8.3",
|
||||||
]
|
]
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">= 3.11, < 3.13.0a1"
|
requires-python = ">= 3.11, < 3.13.0a1"
|
||||||
|
Loading…
Reference in New Issue
Block a user