diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 9f88d1141..2de4de079 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -684,6 +684,10 @@ GOOGLE_APPLICATION_CREDENTIALS_JSON = os.environ.get( "GOOGLE_APPLICATION_CREDENTIALS_JSON", None ) +AZURE_STORAGE_ENDPOINT = os.environ.get("AZURE_STORAGE_ENDPOINT", None) +AZURE_STORAGE_CONTAINER_NAME = os.environ.get("AZURE_STORAGE_CONTAINER_NAME", None) +AZURE_STORAGE_KEY = os.environ.get("AZURE_STORAGE_KEY", None) + #################################### # File Upload DIR #################################### diff --git a/backend/open_webui/storage/provider.py b/backend/open_webui/storage/provider.py index b03cf0a7e..160a45153 100644 --- a/backend/open_webui/storage/provider.py +++ b/backend/open_webui/storage/provider.py @@ -15,12 +15,18 @@ from open_webui.config import ( S3_SECRET_ACCESS_KEY, GCS_BUCKET_NAME, GOOGLE_APPLICATION_CREDENTIALS_JSON, + AZURE_STORAGE_ENDPOINT, + AZURE_STORAGE_CONTAINER_NAME, + AZURE_STORAGE_KEY, STORAGE_PROVIDER, UPLOAD_DIR, ) from google.cloud import storage from google.cloud.exceptions import GoogleCloudError, NotFound from open_webui.constants import ERROR_MESSAGES +from azure.identity import DefaultAzureCredential +from azure.storage.blob import BlobServiceClient +from azure.core.exceptions import ResourceNotFoundError class StorageProvider(ABC): @@ -221,6 +227,74 @@ class GCSStorageProvider(StorageProvider): LocalStorageProvider.delete_all_files() +class AzureStorageProvider(StorageProvider): + def __init__(self): + self.endpoint = AZURE_STORAGE_ENDPOINT + self.container_name = AZURE_STORAGE_CONTAINER_NAME + storage_key = AZURE_STORAGE_KEY + + if storage_key: + # Configure using the Azure Storage Account Endpoint and Key + self.blob_service_client = BlobServiceClient( + account_url=self.endpoint, credential=storage_key + ) + else: + # Configure using the Azure Storage Account Endpoint and DefaultAzureCredential + # If the key is not configured, then the DefaultAzureCredential will be used to support Managed Identity authentication + self.blob_service_client = BlobServiceClient( + account_url=self.endpoint, credential=DefaultAzureCredential() + ) + self.container_client = self.blob_service_client.get_container_client( + self.container_name + ) + + def upload_file(self, file: BinaryIO, filename: str) -> Tuple[bytes, str]: + """Handles uploading of the file to Azure Blob Storage.""" + contents, file_path = LocalStorageProvider.upload_file(file, filename) + try: + blob_client = self.container_client.get_blob_client(filename) + blob_client.upload_blob(contents, overwrite=True) + return contents, f"{self.endpoint}/{self.container_name}/{filename}" + except Exception as e: + raise RuntimeError(f"Error uploading file to Azure Blob Storage: {e}") + + def get_file(self, file_path: str) -> str: + """Handles downloading of the file from Azure Blob Storage.""" + try: + filename = file_path.split("/")[-1] + local_file_path = f"{UPLOAD_DIR}/{filename}" + blob_client = self.container_client.get_blob_client(filename) + with open(local_file_path, "wb") as download_file: + download_file.write(blob_client.download_blob().readall()) + return local_file_path + except ResourceNotFoundError as e: + raise RuntimeError(f"Error downloading file from Azure Blob Storage: {e}") + + def delete_file(self, file_path: str) -> None: + """Handles deletion of the file from Azure Blob Storage.""" + try: + filename = file_path.split("/")[-1] + blob_client = self.container_client.get_blob_client(filename) + blob_client.delete_blob() + except ResourceNotFoundError as e: + raise RuntimeError(f"Error deleting file from Azure Blob Storage: {e}") + + # Always delete from local storage + LocalStorageProvider.delete_file(file_path) + + def delete_all_files(self) -> None: + """Handles deletion of all files from Azure Blob Storage.""" + try: + blobs = self.container_client.list_blobs() + for blob in blobs: + self.container_client.delete_blob(blob.name) + except Exception as e: + raise RuntimeError(f"Error deleting all files from Azure Blob Storage: {e}") + + # Always delete from local storage + LocalStorageProvider.delete_all_files() + + def get_storage_provider(storage_provider: str): if storage_provider == "local": Storage = LocalStorageProvider() @@ -228,6 +302,8 @@ def get_storage_provider(storage_provider: str): Storage = S3StorageProvider() elif storage_provider == "gcs": Storage = GCSStorageProvider() + elif storage_provider == "azure": + Storage = AzureStorageProvider() else: raise RuntimeError(f"Unsupported storage provider: {storage_provider}") return Storage diff --git a/backend/open_webui/test/apps/webui/storage/test_provider.py b/backend/open_webui/test/apps/webui/storage/test_provider.py index 863106e75..a5ef13504 100644 --- a/backend/open_webui/test/apps/webui/storage/test_provider.py +++ b/backend/open_webui/test/apps/webui/storage/test_provider.py @@ -7,6 +7,8 @@ from moto import mock_aws from open_webui.storage import provider from gcp_storage_emulator.server import create_server from google.cloud import storage +from azure.storage.blob import BlobServiceClient, ContainerClient, BlobClient +from unittest.mock import MagicMock def mock_upload_dir(monkeypatch, tmp_path): @@ -22,6 +24,7 @@ def test_imports(): provider.LocalStorageProvider provider.S3StorageProvider provider.GCSStorageProvider + provider.AzureStorageProvider provider.Storage @@ -32,6 +35,8 @@ def test_get_storage_provider(): assert isinstance(Storage, provider.S3StorageProvider) Storage = provider.get_storage_provider("gcs") assert isinstance(Storage, provider.GCSStorageProvider) + Storage = provider.get_storage_provider("azure") + assert isinstance(Storage, provider.AzureStorageProvider) with pytest.raises(RuntimeError): provider.get_storage_provider("invalid") @@ -48,6 +53,7 @@ def test_class_instantiation(): provider.LocalStorageProvider() provider.S3StorageProvider() provider.GCSStorageProvider() + provider.AzureStorageProvider() class TestLocalStorageProvider: @@ -272,3 +278,147 @@ class TestGCSStorageProvider: assert not (upload_dir / self.filename_extra).exists() assert self.Storage.bucket.get_blob(self.filename) == None assert self.Storage.bucket.get_blob(self.filename_extra) == None + + +class TestAzureStorageProvider: + def __init__(self): + super().__init__() + + @pytest.fixture(scope="class") + def setup_storage(self, monkeypatch): + # Create mock Blob Service Client and related clients + mock_blob_service_client = MagicMock() + mock_container_client = MagicMock() + mock_blob_client = MagicMock() + + # Set up return values for the mock + mock_blob_service_client.get_container_client.return_value = ( + mock_container_client + ) + mock_container_client.get_blob_client.return_value = mock_blob_client + + # Monkeypatch the Azure classes to return our mocks + monkeypatch.setattr( + azure.storage.blob, + "BlobServiceClient", + lambda *args, **kwargs: mock_blob_service_client, + ) + monkeypatch.setattr( + azure.storage.blob, + "ContainerClient", + lambda *args, **kwargs: mock_container_client, + ) + monkeypatch.setattr( + azure.storage.blob, "BlobClient", lambda *args, **kwargs: mock_blob_client + ) + + self.Storage = provider.AzureStorageProvider() + self.Storage.endpoint = "https://myaccount.blob.core.windows.net" + self.Storage.container_name = "my-container" + self.file_content = b"test content" + self.filename = "test.txt" + self.filename_extra = "test_extra.txt" + self.file_bytesio_empty = io.BytesIO() + + # Apply mocks to the Storage instance + self.Storage.blob_service_client = mock_blob_service_client + self.Storage.container_client = mock_container_client + + def test_upload_file(self, monkeypatch, tmp_path): + upload_dir = mock_upload_dir(monkeypatch, tmp_path) + + # Simulate an error when container does not exist + self.Storage.container_client.get_blob_client.side_effect = Exception( + "Container does not exist" + ) + with pytest.raises(Exception): + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + + # Reset side effect and create container + self.Storage.container_client.get_blob_client.side_effect = None + self.Storage.create_container() + contents, azure_file_path = self.Storage.upload_file( + io.BytesIO(self.file_content), self.filename + ) + + # Assertions + self.Storage.container_client.get_blob_client.assert_called_with(self.filename) + self.Storage.container_client.get_blob_client().upload_blob.assert_called_once_with( + self.file_content, overwrite=True + ) + assert contents == self.file_content + assert ( + azure_file_path + == f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + ) + assert (upload_dir / self.filename).exists() + assert (upload_dir / self.filename).read_bytes() == self.file_content + + with pytest.raises(ValueError): + self.Storage.upload_file(self.file_bytesio_empty, self.filename) + + def test_get_file(self, monkeypatch, tmp_path): + upload_dir = mock_upload_dir(monkeypatch, tmp_path) + self.Storage.create_container() + + # Mock upload behavior + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + # Mock blob download behavior + self.Storage.container_client.get_blob_client().download_blob().readall.return_value = ( + self.file_content + ) + + file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + file_path = self.Storage.get_file(file_url) + + assert file_path == str(upload_dir / self.filename) + assert (upload_dir / self.filename).exists() + assert (upload_dir / self.filename).read_bytes() == self.file_content + + def test_delete_file(self, monkeypatch, tmp_path): + upload_dir = mock_upload_dir(monkeypatch, tmp_path) + self.Storage.create_container() + + # Mock file upload + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + # Mock deletion + self.Storage.container_client.get_blob_client().delete_blob.return_value = None + + file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + self.Storage.delete_file(file_url) + + self.Storage.container_client.get_blob_client().delete_blob.assert_called_once() + assert not (upload_dir / self.filename).exists() + + def test_delete_all_files(self, monkeypatch, tmp_path): + upload_dir = mock_upload_dir(monkeypatch, tmp_path) + self.Storage.create_container() + + # Mock file uploads + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename) + self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra) + + # Mock listing and deletion behavior + self.Storage.container_client.list_blobs.return_value = [ + {"name": self.filename}, + {"name": self.filename_extra}, + ] + self.Storage.container_client.get_blob_client().delete_blob.return_value = None + + self.Storage.delete_all_files() + + self.Storage.container_client.list_blobs.assert_called_once() + self.Storage.container_client.get_blob_client().delete_blob.assert_any_call() + assert not (upload_dir / self.filename).exists() + assert not (upload_dir / self.filename_extra).exists() + + def test_get_file_not_found(self, monkeypatch): + self.Storage.create_container() + + file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}" + # Mock behavior to raise an error for missing blobs + self.Storage.container_client.get_blob_client().download_blob.side_effect = ( + Exception("Blob not found") + ) + with pytest.raises(Exception, match="Blob not found"): + self.Storage.get_file(file_url) diff --git a/backend/requirements.txt b/backend/requirements.txt index 9b859b84a..f8e5f6684 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -103,5 +103,9 @@ pytest-docker~=3.1.1 googleapis-common-protos==1.63.2 google-cloud-storage==2.19.0 +azure-identity==1.20.0 +azure-storage-blob==12.24.1 + + ## LDAP ldap3==2.9.1