Merge pull request #10752 from NovoNordisk-OpenSource/yvedeng/standardize-logging

refactor: replace print statements with logging
This commit is contained in:
Timothy Jaeryang Baek
2025-02-25 10:53:02 -08:00
committed by GitHub
34 changed files with 171 additions and 103 deletions

View File

@@ -1,13 +1,19 @@
import os
import logging
import torch
import numpy as np
from colbert.infra import ColBERTConfig
from colbert.modeling.checkpoint import Checkpoint
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
class ColBERT:
def __init__(self, name, **kwargs) -> None:
print("ColBERT: Loading model", name)
log.info("ColBERT: Loading model", name)
self.device = "cuda" if torch.cuda.is_available() else "cpu"
DOCKER = kwargs.get("env") == "docker"

View File

@@ -81,7 +81,7 @@ def query_doc(
return result
except Exception as e:
print(e)
log.exception(f"Error querying doc {collection_name} with limit {k}: {e}")
raise e
@@ -94,7 +94,7 @@ def get_doc(collection_name: str, user: UserModel = None):
return result
except Exception as e:
print(e)
log.exception(f"Error getting doc {collection_name}: {e}")
raise e
@@ -530,7 +530,7 @@ def generate_openai_batch_embeddings(
else:
raise "Something went wrong :/"
except Exception as e:
print(e)
log.exception(f"Error generating openai batch embeddings: {e}")
return None
@@ -564,7 +564,7 @@ def generate_ollama_batch_embeddings(
else:
raise "Something went wrong :/"
except Exception as e:
print(e)
log.exception(f"Error generating ollama batch embeddings: {e}")
return None

7
backend/open_webui/retrieval/vector/dbs/chroma.py Normal file → Executable file
View File

@@ -1,4 +1,5 @@
import chromadb
import logging
from chromadb import Settings
from chromadb.utils.batch_utils import create_batches
@@ -16,6 +17,10 @@ from open_webui.config import (
CHROMA_CLIENT_AUTH_PROVIDER,
CHROMA_CLIENT_AUTH_CREDENTIALS,
)
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
class ChromaClient:
@@ -103,7 +108,7 @@ class ChromaClient:
)
return None
except Exception as e:
print(e)
log.exception(f"Error querying collection {collection} with limit {limit}: {e}")
return None
def get(self, collection_name: str) -> Optional[GetResult]:

View File

@@ -1,7 +1,7 @@
from pymilvus import MilvusClient as Client
from pymilvus import FieldSchema, DataType
import json
import logging
from typing import Optional
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
@@ -10,6 +10,10 @@ from open_webui.config import (
MILVUS_DB,
MILVUS_TOKEN,
)
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
class MilvusClient:
@@ -168,7 +172,7 @@ class MilvusClient:
try:
# Loop until there are no more items to fetch or the desired limit is reached
while remaining > 0:
print("remaining", remaining)
log.info(f"remaining: {remaining}")
current_fetch = min(
max_limit, remaining
) # Determine how many items to fetch in this iteration
@@ -195,10 +199,10 @@ class MilvusClient:
if results_count < current_fetch:
break
print(all_results)
log.debug(all_results)
return self._result_to_get_result([all_results])
except Exception as e:
print(e)
log.exception(f"Error querying collection {collection_name} with limit {limit}: {e}")
return None
def get(self, collection_name: str) -> Optional[GetResult]:

View File

@@ -1,4 +1,5 @@
from typing import Optional, List, Dict, Any
import logging
from sqlalchemy import (
cast,
column,
@@ -24,9 +25,14 @@ from sqlalchemy.exc import NoSuchTableError
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import PGVECTOR_DB_URL, PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH
from open_webui.env import SRC_LOG_LEVELS
VECTOR_LENGTH = PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH
Base = declarative_base()
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
class DocumentChunk(Base):
__tablename__ = "document_chunk"
@@ -82,10 +88,10 @@ class PgvectorClient:
)
)
self.session.commit()
print("Initialization complete.")
log.info("Initialization complete.")
except Exception as e:
self.session.rollback()
print(f"Error during initialization: {e}")
log.exception(f"Error during initialization: {e}")
raise
def check_vector_length(self) -> None:
@@ -150,12 +156,12 @@ class PgvectorClient:
new_items.append(new_chunk)
self.session.bulk_save_objects(new_items)
self.session.commit()
print(
log.info(
f"Inserted {len(new_items)} items into collection '{collection_name}'."
)
except Exception as e:
self.session.rollback()
print(f"Error during insert: {e}")
log.exception(f"Error during insert: {e}")
raise
def upsert(self, collection_name: str, items: List[VectorItem]) -> None:
@@ -184,10 +190,10 @@ class PgvectorClient:
)
self.session.add(new_chunk)
self.session.commit()
print(f"Upserted {len(items)} items into collection '{collection_name}'.")
log.info(f"Upserted {len(items)} items into collection '{collection_name}'.")
except Exception as e:
self.session.rollback()
print(f"Error during upsert: {e}")
log.exception(f"Error during upsert: {e}")
raise
def search(
@@ -278,7 +284,7 @@ class PgvectorClient:
ids=ids, distances=distances, documents=documents, metadatas=metadatas
)
except Exception as e:
print(f"Error during search: {e}")
log.exception(f"Error during search: {e}")
return None
def query(
@@ -310,7 +316,7 @@ class PgvectorClient:
metadatas=metadatas,
)
except Exception as e:
print(f"Error during query: {e}")
log.exception(f"Error during query: {e}")
return None
def get(
@@ -334,7 +340,7 @@ class PgvectorClient:
return GetResult(ids=ids, documents=documents, metadatas=metadatas)
except Exception as e:
print(f"Error during get: {e}")
log.exception(f"Error during get: {e}")
return None
def delete(
@@ -356,22 +362,22 @@ class PgvectorClient:
)
deleted = query.delete(synchronize_session=False)
self.session.commit()
print(f"Deleted {deleted} items from collection '{collection_name}'.")
log.info(f"Deleted {deleted} items from collection '{collection_name}'.")
except Exception as e:
self.session.rollback()
print(f"Error during delete: {e}")
log.exception(f"Error during delete: {e}")
raise
def reset(self) -> None:
try:
deleted = self.session.query(DocumentChunk).delete()
self.session.commit()
print(
log.info(
f"Reset complete. Deleted {deleted} items from 'document_chunk' table."
)
except Exception as e:
self.session.rollback()
print(f"Error during reset: {e}")
log.exception(f"Error during reset: {e}")
raise
def close(self) -> None:
@@ -387,9 +393,9 @@ class PgvectorClient:
)
return exists
except Exception as e:
print(f"Error checking collection existence: {e}")
log.exception(f"Error checking collection existence: {e}")
return False
def delete_collection(self, collection_name: str) -> None:
self.delete(collection_name)
print(f"Collection '{collection_name}' deleted.")
log.info(f"Collection '{collection_name}' deleted.")

View File

@@ -1,4 +1,5 @@
from typing import Optional
import logging
from qdrant_client import QdrantClient as Qclient
from qdrant_client.http.models import PointStruct
@@ -6,9 +7,13 @@ from qdrant_client.models import models
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import QDRANT_URI, QDRANT_API_KEY
from open_webui.env import SRC_LOG_LEVELS
NO_LIMIT = 999999999
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
class QdrantClient:
def __init__(self):
@@ -49,7 +54,7 @@ class QdrantClient:
),
)
print(f"collection {collection_name_with_prefix} successfully created!")
log.info(f"collection {collection_name_with_prefix} successfully created!")
def _create_collection_if_not_exists(self, collection_name, dimension):
if not self.has_collection(collection_name=collection_name):
@@ -120,7 +125,7 @@ class QdrantClient:
)
return self._result_to_get_result(points.points)
except Exception as e:
print(e)
log.exception(f"Error querying a collection '{collection_name}': {e}")
return None
def get(self, collection_name: str) -> Optional[GetResult]: