Merge pull request #10308 from crpietschmann/azure-storage

feat: Add AzureStorageProvider for Azure Blob Storage support
This commit is contained in:
Timothy Jaeryang Baek 2025-02-18 19:32:17 -08:00 committed by GitHub
commit 5a7f9c81e3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 234 additions and 0 deletions

View File

@ -684,6 +684,10 @@ GOOGLE_APPLICATION_CREDENTIALS_JSON = os.environ.get(
"GOOGLE_APPLICATION_CREDENTIALS_JSON", None
)
AZURE_STORAGE_ENDPOINT = os.environ.get("AZURE_STORAGE_ENDPOINT", None)
AZURE_STORAGE_CONTAINER_NAME = os.environ.get("AZURE_STORAGE_CONTAINER_NAME", None)
AZURE_STORAGE_KEY = os.environ.get("AZURE_STORAGE_KEY", None)
####################################
# File Upload DIR
####################################

View File

@ -15,12 +15,18 @@ from open_webui.config import (
S3_SECRET_ACCESS_KEY,
GCS_BUCKET_NAME,
GOOGLE_APPLICATION_CREDENTIALS_JSON,
AZURE_STORAGE_ENDPOINT,
AZURE_STORAGE_CONTAINER_NAME,
AZURE_STORAGE_KEY,
STORAGE_PROVIDER,
UPLOAD_DIR,
)
from google.cloud import storage
from google.cloud.exceptions import GoogleCloudError, NotFound
from open_webui.constants import ERROR_MESSAGES
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient
from azure.core.exceptions import ResourceNotFoundError
class StorageProvider(ABC):
@ -221,6 +227,74 @@ class GCSStorageProvider(StorageProvider):
LocalStorageProvider.delete_all_files()
class AzureStorageProvider(StorageProvider):
def __init__(self):
self.endpoint = AZURE_STORAGE_ENDPOINT
self.container_name = AZURE_STORAGE_CONTAINER_NAME
storage_key = AZURE_STORAGE_KEY
if storage_key:
# Configure using the Azure Storage Account Endpoint and Key
self.blob_service_client = BlobServiceClient(
account_url=self.endpoint, credential=storage_key
)
else:
# Configure using the Azure Storage Account Endpoint and DefaultAzureCredential
# If the key is not configured, then the DefaultAzureCredential will be used to support Managed Identity authentication
self.blob_service_client = BlobServiceClient(
account_url=self.endpoint, credential=DefaultAzureCredential()
)
self.container_client = self.blob_service_client.get_container_client(
self.container_name
)
def upload_file(self, file: BinaryIO, filename: str) -> Tuple[bytes, str]:
"""Handles uploading of the file to Azure Blob Storage."""
contents, file_path = LocalStorageProvider.upload_file(file, filename)
try:
blob_client = self.container_client.get_blob_client(filename)
blob_client.upload_blob(contents, overwrite=True)
return contents, f"{self.endpoint}/{self.container_name}/{filename}"
except Exception as e:
raise RuntimeError(f"Error uploading file to Azure Blob Storage: {e}")
def get_file(self, file_path: str) -> str:
"""Handles downloading of the file from Azure Blob Storage."""
try:
filename = file_path.split("/")[-1]
local_file_path = f"{UPLOAD_DIR}/{filename}"
blob_client = self.container_client.get_blob_client(filename)
with open(local_file_path, "wb") as download_file:
download_file.write(blob_client.download_blob().readall())
return local_file_path
except ResourceNotFoundError as e:
raise RuntimeError(f"Error downloading file from Azure Blob Storage: {e}")
def delete_file(self, file_path: str) -> None:
"""Handles deletion of the file from Azure Blob Storage."""
try:
filename = file_path.split("/")[-1]
blob_client = self.container_client.get_blob_client(filename)
blob_client.delete_blob()
except ResourceNotFoundError as e:
raise RuntimeError(f"Error deleting file from Azure Blob Storage: {e}")
# Always delete from local storage
LocalStorageProvider.delete_file(file_path)
def delete_all_files(self) -> None:
"""Handles deletion of all files from Azure Blob Storage."""
try:
blobs = self.container_client.list_blobs()
for blob in blobs:
self.container_client.delete_blob(blob.name)
except Exception as e:
raise RuntimeError(f"Error deleting all files from Azure Blob Storage: {e}")
# Always delete from local storage
LocalStorageProvider.delete_all_files()
def get_storage_provider(storage_provider: str):
if storage_provider == "local":
Storage = LocalStorageProvider()
@ -228,6 +302,8 @@ def get_storage_provider(storage_provider: str):
Storage = S3StorageProvider()
elif storage_provider == "gcs":
Storage = GCSStorageProvider()
elif storage_provider == "azure":
Storage = AzureStorageProvider()
else:
raise RuntimeError(f"Unsupported storage provider: {storage_provider}")
return Storage

View File

@ -7,6 +7,8 @@ from moto import mock_aws
from open_webui.storage import provider
from gcp_storage_emulator.server import create_server
from google.cloud import storage
from azure.storage.blob import BlobServiceClient, ContainerClient, BlobClient
from unittest.mock import MagicMock
def mock_upload_dir(monkeypatch, tmp_path):
@ -22,6 +24,7 @@ def test_imports():
provider.LocalStorageProvider
provider.S3StorageProvider
provider.GCSStorageProvider
provider.AzureStorageProvider
provider.Storage
@ -32,6 +35,8 @@ def test_get_storage_provider():
assert isinstance(Storage, provider.S3StorageProvider)
Storage = provider.get_storage_provider("gcs")
assert isinstance(Storage, provider.GCSStorageProvider)
Storage = provider.get_storage_provider("azure")
assert isinstance(Storage, provider.AzureStorageProvider)
with pytest.raises(RuntimeError):
provider.get_storage_provider("invalid")
@ -48,6 +53,7 @@ def test_class_instantiation():
provider.LocalStorageProvider()
provider.S3StorageProvider()
provider.GCSStorageProvider()
provider.AzureStorageProvider()
class TestLocalStorageProvider:
@ -272,3 +278,147 @@ class TestGCSStorageProvider:
assert not (upload_dir / self.filename_extra).exists()
assert self.Storage.bucket.get_blob(self.filename) == None
assert self.Storage.bucket.get_blob(self.filename_extra) == None
class TestAzureStorageProvider:
def __init__(self):
super().__init__()
@pytest.fixture(scope="class")
def setup_storage(self, monkeypatch):
# Create mock Blob Service Client and related clients
mock_blob_service_client = MagicMock()
mock_container_client = MagicMock()
mock_blob_client = MagicMock()
# Set up return values for the mock
mock_blob_service_client.get_container_client.return_value = (
mock_container_client
)
mock_container_client.get_blob_client.return_value = mock_blob_client
# Monkeypatch the Azure classes to return our mocks
monkeypatch.setattr(
azure.storage.blob,
"BlobServiceClient",
lambda *args, **kwargs: mock_blob_service_client,
)
monkeypatch.setattr(
azure.storage.blob,
"ContainerClient",
lambda *args, **kwargs: mock_container_client,
)
monkeypatch.setattr(
azure.storage.blob, "BlobClient", lambda *args, **kwargs: mock_blob_client
)
self.Storage = provider.AzureStorageProvider()
self.Storage.endpoint = "https://myaccount.blob.core.windows.net"
self.Storage.container_name = "my-container"
self.file_content = b"test content"
self.filename = "test.txt"
self.filename_extra = "test_extra.txt"
self.file_bytesio_empty = io.BytesIO()
# Apply mocks to the Storage instance
self.Storage.blob_service_client = mock_blob_service_client
self.Storage.container_client = mock_container_client
def test_upload_file(self, monkeypatch, tmp_path):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
# Simulate an error when container does not exist
self.Storage.container_client.get_blob_client.side_effect = Exception(
"Container does not exist"
)
with pytest.raises(Exception):
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
# Reset side effect and create container
self.Storage.container_client.get_blob_client.side_effect = None
self.Storage.create_container()
contents, azure_file_path = self.Storage.upload_file(
io.BytesIO(self.file_content), self.filename
)
# Assertions
self.Storage.container_client.get_blob_client.assert_called_with(self.filename)
self.Storage.container_client.get_blob_client().upload_blob.assert_called_once_with(
self.file_content, overwrite=True
)
assert contents == self.file_content
assert (
azure_file_path
== f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}"
)
assert (upload_dir / self.filename).exists()
assert (upload_dir / self.filename).read_bytes() == self.file_content
with pytest.raises(ValueError):
self.Storage.upload_file(self.file_bytesio_empty, self.filename)
def test_get_file(self, monkeypatch, tmp_path):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
self.Storage.create_container()
# Mock upload behavior
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
# Mock blob download behavior
self.Storage.container_client.get_blob_client().download_blob().readall.return_value = (
self.file_content
)
file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}"
file_path = self.Storage.get_file(file_url)
assert file_path == str(upload_dir / self.filename)
assert (upload_dir / self.filename).exists()
assert (upload_dir / self.filename).read_bytes() == self.file_content
def test_delete_file(self, monkeypatch, tmp_path):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
self.Storage.create_container()
# Mock file upload
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
# Mock deletion
self.Storage.container_client.get_blob_client().delete_blob.return_value = None
file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}"
self.Storage.delete_file(file_url)
self.Storage.container_client.get_blob_client().delete_blob.assert_called_once()
assert not (upload_dir / self.filename).exists()
def test_delete_all_files(self, monkeypatch, tmp_path):
upload_dir = mock_upload_dir(monkeypatch, tmp_path)
self.Storage.create_container()
# Mock file uploads
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename)
self.Storage.upload_file(io.BytesIO(self.file_content), self.filename_extra)
# Mock listing and deletion behavior
self.Storage.container_client.list_blobs.return_value = [
{"name": self.filename},
{"name": self.filename_extra},
]
self.Storage.container_client.get_blob_client().delete_blob.return_value = None
self.Storage.delete_all_files()
self.Storage.container_client.list_blobs.assert_called_once()
self.Storage.container_client.get_blob_client().delete_blob.assert_any_call()
assert not (upload_dir / self.filename).exists()
assert not (upload_dir / self.filename_extra).exists()
def test_get_file_not_found(self, monkeypatch):
self.Storage.create_container()
file_url = f"https://myaccount.blob.core.windows.net/{self.Storage.container_name}/{self.filename}"
# Mock behavior to raise an error for missing blobs
self.Storage.container_client.get_blob_client().download_blob.side_effect = (
Exception("Blob not found")
)
with pytest.raises(Exception, match="Blob not found"):
self.Storage.get_file(file_url)

View File

@ -103,5 +103,9 @@ pytest-docker~=3.1.1
googleapis-common-protos==1.63.2
google-cloud-storage==2.19.0
azure-identity==1.20.0
azure-storage-blob==12.24.1
## LDAP
ldap3==2.9.1