diff --git a/CHANGELOG.md b/CHANGELOG.md index 87363d2d2..478c1668c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.119] - 2024-04-16 + +### Added + +- **🌟 Enhanced RAG Embedding Support**: Ollama, and OpenAI models can now be used for RAG embedding model. +- **🔄 Seamless Integration**: Copy 'ollama run ' directly from Ollama page to easily select and pull models. +- **🏷️ Tagging Feature**: Add tags to chats directly via the sidebar chat menu. +- **📱 Mobile Accessibility**: Swipe left and right on mobile to effortlessly open and close the sidebar. +- **🔍 Improved Navigation**: Admin panel now supports pagination for user list. +- **🌍 Additional Language Support**: Added Polish language support. + +### Fixed + +- **🌍 Language Enhancements**: Vietnamese and Spanish translations have been improved. +- **🔧 Helm Fixes**: Resolved issues with Helm trailing slash and manifest.json. + +### Changed + +- **🐳 Docker Optimization**: Updated docker image build process to utilize 'uv' for significantly faster builds compared to 'pip3'. + ## [0.1.118] - 2024-04-10 ### Added diff --git a/Dockerfile b/Dockerfile index 6cf8ded28..f19952909 100644 --- a/Dockerfile +++ b/Dockerfile @@ -93,15 +93,16 @@ RUN if [ "$USE_OLLAMA" = "true" ]; then \ # install python dependencies COPY ./backend/requirements.txt ./requirements.txt -RUN if [ "$USE_CUDA" = "true" ]; then \ +RUN pip3 install uv && \ + if [ "$USE_CUDA" = "true" ]; then \ # If you use CUDA the whisper and embedding model will be downloaded on first use pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/$USE_CUDA_DOCKER_VER --no-cache-dir && \ - pip3 install -r requirements.txt --no-cache-dir && \ + uv pip install --system -r requirements.txt --no-cache-dir && \ python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ else \ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir && \ - pip3 install -r requirements.txt --no-cache-dir && \ + uv pip install --system -r requirements.txt --no-cache-dir && \ python -c "import os; from faster_whisper import WhisperModel; WhisperModel(os.environ['WHISPER_MODEL'], device='cpu', compute_type='int8', download_root=os.environ['WHISPER_MODEL_DIR'])" && \ python -c "import os; from chromadb.utils import embedding_functions; sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name=os.environ['RAG_EMBEDDING_MODEL'], device='cpu')"; \ fi diff --git a/backend/apps/audio/main.py b/backend/apps/audio/main.py index 02d1f5e8e..f93b50f6e 100644 --- a/backend/apps/audio/main.py +++ b/backend/apps/audio/main.py @@ -28,6 +28,7 @@ from config import ( UPLOAD_DIR, WHISPER_MODEL, WHISPER_MODEL_DIR, + WHISPER_MODEL_AUTO_UPDATE, DEVICE_TYPE, ) @@ -69,12 +70,24 @@ def transcribe( f.write(contents) f.close() - model = WhisperModel( - WHISPER_MODEL, - device=whisper_device_type, - compute_type="int8", - download_root=WHISPER_MODEL_DIR, - ) + whisper_kwargs = { + "model_size_or_path": WHISPER_MODEL, + "device": whisper_device_type, + "compute_type": "int8", + "download_root": WHISPER_MODEL_DIR, + "local_files_only": not WHISPER_MODEL_AUTO_UPDATE, + } + + log.debug(f"whisper_kwargs: {whisper_kwargs}") + + try: + model = WhisperModel(**whisper_kwargs) + except: + log.warning( + "WhisperModel initialization failed, attempting download with local_files_only=False" + ) + whisper_kwargs["local_files_only"] = False + model = WhisperModel(**whisper_kwargs) segments, info = model.transcribe(file_path, beam_size=5) log.info( diff --git a/backend/apps/images/main.py b/backend/apps/images/main.py index af8cbf7c5..f39984de0 100644 --- a/backend/apps/images/main.py +++ b/backend/apps/images/main.py @@ -29,7 +29,13 @@ import base64 import json import logging -from config import SRC_LOG_LEVELS, CACHE_DIR, AUTOMATIC1111_BASE_URL, COMFYUI_BASE_URL +from config import ( + SRC_LOG_LEVELS, + CACHE_DIR, + ENABLE_IMAGE_GENERATION, + AUTOMATIC1111_BASE_URL, + COMFYUI_BASE_URL, +) log = logging.getLogger(__name__) @@ -48,7 +54,7 @@ app.add_middleware( ) app.state.ENGINE = "" -app.state.ENABLED = False +app.state.ENABLED = ENABLE_IMAGE_GENERATION app.state.OPENAI_API_KEY = "" app.state.MODEL = "" diff --git a/backend/apps/ollama/main.py b/backend/apps/ollama/main.py index 5e19a8e36..9258efa66 100644 --- a/backend/apps/ollama/main.py +++ b/backend/apps/ollama/main.py @@ -612,8 +612,13 @@ async def generate_embeddings( user=Depends(get_current_user), ): if url_idx == None: - if form_data.model in app.state.MODELS: - url_idx = random.choice(app.state.MODELS[form_data.model]["urls"]) + model = form_data.model + + if ":" not in model: + model = f"{model}:latest" + + if model in app.state.MODELS: + url_idx = random.choice(app.state.MODELS[model]["urls"]) else: raise HTTPException( status_code=400, @@ -649,6 +654,60 @@ async def generate_embeddings( ) +def generate_ollama_embeddings( + form_data: GenerateEmbeddingsForm, + url_idx: Optional[int] = None, +): + + log.info(f"generate_ollama_embeddings {form_data}") + + if url_idx == None: + model = form_data.model + + if ":" not in model: + model = f"{model}:latest" + + if model in app.state.MODELS: + url_idx = random.choice(app.state.MODELS[model]["urls"]) + else: + raise HTTPException( + status_code=400, + detail=ERROR_MESSAGES.MODEL_NOT_FOUND(form_data.model), + ) + + url = app.state.OLLAMA_BASE_URLS[url_idx] + log.info(f"url: {url}") + + try: + r = requests.request( + method="POST", + url=f"{url}/api/embeddings", + data=form_data.model_dump_json(exclude_none=True).encode(), + ) + r.raise_for_status() + + data = r.json() + + log.info(f"generate_ollama_embeddings {data}") + + if "embedding" in data: + return data["embedding"] + else: + raise "Something went wrong :/" + except Exception as e: + log.exception(e) + error_detail = "Open WebUI: Server Connection Error" + if r is not None: + try: + res = r.json() + if "error" in res: + error_detail = f"Ollama: {res['error']}" + except: + error_detail = f"Ollama: {e}" + + raise error_detail + + class GenerateCompletionForm(BaseModel): model: str prompt: str @@ -672,8 +731,13 @@ async def generate_completion( ): if url_idx == None: - if form_data.model in app.state.MODELS: - url_idx = random.choice(app.state.MODELS[form_data.model]["urls"]) + model = form_data.model + + if ":" not in model: + model = f"{model}:latest" + + if model in app.state.MODELS: + url_idx = random.choice(app.state.MODELS[model]["urls"]) else: raise HTTPException( status_code=400, @@ -770,8 +834,13 @@ async def generate_chat_completion( ): if url_idx == None: - if form_data.model in app.state.MODELS: - url_idx = random.choice(app.state.MODELS[form_data.model]["urls"]) + model = form_data.model + + if ":" not in model: + model = f"{model}:latest" + + if model in app.state.MODELS: + url_idx = random.choice(app.state.MODELS[model]["urls"]) else: raise HTTPException( status_code=400, @@ -874,8 +943,13 @@ async def generate_openai_chat_completion( ): if url_idx == None: - if form_data.model in app.state.MODELS: - url_idx = random.choice(app.state.MODELS[form_data.model]["urls"]) + model = form_data.model + + if ":" not in model: + model = f"{model}:latest" + + if model in app.state.MODELS: + url_idx = random.choice(app.state.MODELS[model]["urls"]) else: raise HTTPException( status_code=400, diff --git a/backend/apps/rag/main.py b/backend/apps/rag/main.py index f03aa4b7f..5e9564f7d 100644 --- a/backend/apps/rag/main.py +++ b/backend/apps/rag/main.py @@ -39,13 +39,22 @@ import uuid import json +from apps.ollama.main import generate_ollama_embeddings, GenerateEmbeddingsForm + from apps.web.models.documents import ( Documents, DocumentForm, DocumentResponse, ) -from apps.rag.utils import query_doc, query_collection, get_embedding_model_path +from apps.rag.utils import ( + query_doc, + query_embeddings_doc, + query_collection, + query_embeddings_collection, + get_embedding_model_path, + generate_openai_embeddings, +) from utils.misc import ( calculate_sha256, @@ -58,6 +67,7 @@ from config import ( SRC_LOG_LEVELS, UPLOAD_DIR, DOCS_DIR, + RAG_EMBEDDING_ENGINE, RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE, DEVICE_TYPE, @@ -74,16 +84,21 @@ log.setLevel(SRC_LOG_LEVELS["RAG"]) app = FastAPI() -app.state.PDF_EXTRACT_IMAGES = False -app.state.CHUNK_SIZE = CHUNK_SIZE -app.state.CHUNK_OVERLAP = CHUNK_OVERLAP -app.state.RAG_TEMPLATE = RAG_TEMPLATE - - -app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL - app.state.TOP_K = 4 +app.state.CHUNK_SIZE = CHUNK_SIZE +app.state.CHUNK_OVERLAP = CHUNK_OVERLAP + + +app.state.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE +app.state.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL +app.state.RAG_TEMPLATE = RAG_TEMPLATE + +app.state.RAG_OPENAI_API_BASE_URL = "https://api.openai.com" +app.state.RAG_OPENAI_API_KEY = "" + +app.state.PDF_EXTRACT_IMAGES = False + app.state.sentence_transformer_ef = ( embedding_functions.SentenceTransformerEmbeddingFunction( @@ -121,45 +136,72 @@ async def get_status(): "chunk_size": app.state.CHUNK_SIZE, "chunk_overlap": app.state.CHUNK_OVERLAP, "template": app.state.RAG_TEMPLATE, + "embedding_engine": app.state.RAG_EMBEDDING_ENGINE, "embedding_model": app.state.RAG_EMBEDDING_MODEL, } -@app.get("/embedding/model") -async def get_embedding_model(user=Depends(get_admin_user)): +@app.get("/embedding") +async def get_embedding_config(user=Depends(get_admin_user)): return { "status": True, + "embedding_engine": app.state.RAG_EMBEDDING_ENGINE, "embedding_model": app.state.RAG_EMBEDDING_MODEL, + "openai_config": { + "url": app.state.RAG_OPENAI_API_BASE_URL, + "key": app.state.RAG_OPENAI_API_KEY, + }, } +class OpenAIConfigForm(BaseModel): + url: str + key: str + + class EmbeddingModelUpdateForm(BaseModel): + openai_config: Optional[OpenAIConfigForm] = None + embedding_engine: str embedding_model: str -@app.post("/embedding/model/update") -async def update_embedding_model( +@app.post("/embedding/update") +async def update_embedding_config( form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user) ): - log.info( f"Updating embedding model: {app.state.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}" ) - try: - sentence_transformer_ef = ( - embedding_functions.SentenceTransformerEmbeddingFunction( - model_name=get_embedding_model_path(form_data.embedding_model, True), - device=DEVICE_TYPE, - ) - ) + app.state.RAG_EMBEDDING_ENGINE = form_data.embedding_engine - app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model - app.state.sentence_transformer_ef = sentence_transformer_ef + if app.state.RAG_EMBEDDING_ENGINE in ["ollama", "openai"]: + app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model + app.state.sentence_transformer_ef = None + + if form_data.openai_config != None: + app.state.RAG_OPENAI_API_BASE_URL = form_data.openai_config.url + app.state.RAG_OPENAI_API_KEY = form_data.openai_config.key + else: + sentence_transformer_ef = ( + embedding_functions.SentenceTransformerEmbeddingFunction( + model_name=get_embedding_model_path( + form_data.embedding_model, True + ), + device=DEVICE_TYPE, + ) + ) + app.state.RAG_EMBEDDING_MODEL = form_data.embedding_model + app.state.sentence_transformer_ef = sentence_transformer_ef return { "status": True, + "embedding_engine": app.state.RAG_EMBEDDING_ENGINE, "embedding_model": app.state.RAG_EMBEDDING_MODEL, + "openai_config": { + "url": app.state.RAG_OPENAI_API_BASE_URL, + "key": app.state.RAG_OPENAI_API_KEY, + }, } except Exception as e: @@ -252,12 +294,37 @@ def query_doc_handler( ): try: - return query_doc( - collection_name=form_data.collection_name, - query=form_data.query, - k=form_data.k if form_data.k else app.state.TOP_K, - embedding_function=app.state.sentence_transformer_ef, - ) + if app.state.RAG_EMBEDDING_ENGINE == "": + return query_doc( + collection_name=form_data.collection_name, + query=form_data.query, + k=form_data.k if form_data.k else app.state.TOP_K, + embedding_function=app.state.sentence_transformer_ef, + ) + else: + if app.state.RAG_EMBEDDING_ENGINE == "ollama": + query_embeddings = generate_ollama_embeddings( + GenerateEmbeddingsForm( + **{ + "model": app.state.RAG_EMBEDDING_MODEL, + "prompt": form_data.query, + } + ) + ) + elif app.state.RAG_EMBEDDING_ENGINE == "openai": + query_embeddings = generate_openai_embeddings( + model=app.state.RAG_EMBEDDING_MODEL, + text=form_data.query, + key=app.state.RAG_OPENAI_API_KEY, + url=app.state.RAG_OPENAI_API_BASE_URL, + ) + + return query_embeddings_doc( + collection_name=form_data.collection_name, + query_embeddings=query_embeddings, + k=form_data.k if form_data.k else app.state.TOP_K, + ) + except Exception as e: log.exception(e) raise HTTPException( @@ -277,12 +344,45 @@ def query_collection_handler( form_data: QueryCollectionsForm, user=Depends(get_current_user), ): - return query_collection( - collection_names=form_data.collection_names, - query=form_data.query, - k=form_data.k if form_data.k else app.state.TOP_K, - embedding_function=app.state.sentence_transformer_ef, - ) + try: + if app.state.RAG_EMBEDDING_ENGINE == "": + return query_collection( + collection_names=form_data.collection_names, + query=form_data.query, + k=form_data.k if form_data.k else app.state.TOP_K, + embedding_function=app.state.sentence_transformer_ef, + ) + else: + + if app.state.RAG_EMBEDDING_ENGINE == "ollama": + query_embeddings = generate_ollama_embeddings( + GenerateEmbeddingsForm( + **{ + "model": app.state.RAG_EMBEDDING_MODEL, + "prompt": form_data.query, + } + ) + ) + elif app.state.RAG_EMBEDDING_ENGINE == "openai": + query_embeddings = generate_openai_embeddings( + model=app.state.RAG_EMBEDDING_MODEL, + text=form_data.query, + key=app.state.RAG_OPENAI_API_KEY, + url=app.state.RAG_OPENAI_API_BASE_URL, + ) + + return query_embeddings_collection( + collection_names=form_data.collection_names, + query_embeddings=query_embeddings, + k=form_data.k if form_data.k else app.state.TOP_K, + ) + + except Exception as e: + log.exception(e) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT(e), + ) @app.post("/web") @@ -317,9 +417,11 @@ def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> b chunk_overlap=app.state.CHUNK_OVERLAP, add_start_index=True, ) + docs = text_splitter.split_documents(data) if len(docs) > 0: + log.info(f"store_data_in_vector_db {docs}") return store_docs_in_vector_db(docs, collection_name, overwrite), None else: raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT) @@ -338,6 +440,7 @@ def store_text_in_vector_db( def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> bool: + log.info(f"store_docs_in_vector_db {docs} {collection_name}") texts = [doc.page_content for doc in docs] metadatas = [doc.metadata for doc in docs] @@ -349,18 +452,52 @@ def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> b log.info(f"deleting existing collection {collection_name}") CHROMA_CLIENT.delete_collection(name=collection_name) - collection = CHROMA_CLIENT.create_collection( - name=collection_name, - embedding_function=app.state.sentence_transformer_ef, - ) + if app.state.RAG_EMBEDDING_ENGINE == "": - for batch in create_batches( - api=CHROMA_CLIENT, - ids=[str(uuid.uuid1()) for _ in texts], - metadatas=metadatas, - documents=texts, - ): - collection.add(*batch) + collection = CHROMA_CLIENT.create_collection( + name=collection_name, + embedding_function=app.state.sentence_transformer_ef, + ) + + for batch in create_batches( + api=CHROMA_CLIENT, + ids=[str(uuid.uuid1()) for _ in texts], + metadatas=metadatas, + documents=texts, + ): + collection.add(*batch) + + else: + collection = CHROMA_CLIENT.create_collection(name=collection_name) + + if app.state.RAG_EMBEDDING_ENGINE == "ollama": + embeddings = [ + generate_ollama_embeddings( + GenerateEmbeddingsForm( + **{"model": app.state.RAG_EMBEDDING_MODEL, "prompt": text} + ) + ) + for text in texts + ] + elif app.state.RAG_EMBEDDING_ENGINE == "openai": + embeddings = [ + generate_openai_embeddings( + model=app.state.RAG_EMBEDDING_MODEL, + text=text, + key=app.state.RAG_OPENAI_API_KEY, + url=app.state.RAG_OPENAI_API_BASE_URL, + ) + for text in texts + ] + + for batch in create_batches( + api=CHROMA_CLIENT, + ids=[str(uuid.uuid1()) for _ in texts], + metadatas=metadatas, + embeddings=embeddings, + documents=texts, + ): + collection.add(*batch) return True except Exception as e: diff --git a/backend/apps/rag/utils.py b/backend/apps/rag/utils.py index 7bbfe0b88..daea36863 100644 --- a/backend/apps/rag/utils.py +++ b/backend/apps/rag/utils.py @@ -2,10 +2,16 @@ import os import re import logging from typing import List +import requests + + from huggingface_hub import snapshot_download +from apps.ollama.main import generate_ollama_embeddings, GenerateEmbeddingsForm + from config import SRC_LOG_LEVELS, CHROMA_CLIENT + log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["RAG"]) @@ -26,6 +32,24 @@ def query_doc(collection_name: str, query: str, k: int, embedding_function): raise e +def query_embeddings_doc(collection_name: str, query_embeddings, k: int): + try: + # if you use docker use the model from the environment variable + log.info(f"query_embeddings_doc {query_embeddings}") + collection = CHROMA_CLIENT.get_collection( + name=collection_name, + ) + result = collection.query( + query_embeddings=[query_embeddings], + n_results=k, + ) + + log.info(f"query_embeddings_doc:result {result}") + return result + except Exception as e: + raise e + + def merge_and_sort_query_results(query_results, k): # Initialize lists to store combined data combined_ids = [] @@ -96,14 +120,46 @@ def query_collection( return merge_and_sort_query_results(results, k) +def query_embeddings_collection(collection_names: List[str], query_embeddings, k: int): + + results = [] + log.info(f"query_embeddings_collection {query_embeddings}") + + for collection_name in collection_names: + try: + collection = CHROMA_CLIENT.get_collection(name=collection_name) + + result = collection.query( + query_embeddings=[query_embeddings], + n_results=k, + ) + results.append(result) + except: + pass + + return merge_and_sort_query_results(results, k) + + def rag_template(template: str, context: str, query: str): template = template.replace("[context]", context) template = template.replace("[query]", query) return template -def rag_messages(docs, messages, template, k, embedding_function): - log.debug(f"docs: {docs}") +def rag_messages( + docs, + messages, + template, + k, + embedding_engine, + embedding_model, + embedding_function, + openai_key, + openai_url, +): + log.debug( + f"docs: {docs} {messages} {embedding_engine} {embedding_model} {embedding_function} {openai_key} {openai_url}" + ) last_user_message_idx = None for i in range(len(messages) - 1, -1, -1): @@ -136,22 +192,57 @@ def rag_messages(docs, messages, template, k, embedding_function): context = None try: - if doc["type"] == "collection": - context = query_collection( - collection_names=doc["collection_names"], - query=query, - k=k, - embedding_function=embedding_function, - ) - elif doc["type"] == "text": + + if doc["type"] == "text": context = doc["content"] else: - context = query_doc( - collection_name=doc["collection_name"], - query=query, - k=k, - embedding_function=embedding_function, - ) + if embedding_engine == "": + if doc["type"] == "collection": + context = query_collection( + collection_names=doc["collection_names"], + query=query, + k=k, + embedding_function=embedding_function, + ) + else: + context = query_doc( + collection_name=doc["collection_name"], + query=query, + k=k, + embedding_function=embedding_function, + ) + + else: + if embedding_engine == "ollama": + query_embeddings = generate_ollama_embeddings( + GenerateEmbeddingsForm( + **{ + "model": embedding_model, + "prompt": query, + } + ) + ) + elif embedding_engine == "openai": + query_embeddings = generate_openai_embeddings( + model=embedding_model, + text=query, + key=openai_key, + url=openai_url, + ) + + if doc["type"] == "collection": + context = query_embeddings_collection( + collection_names=doc["collection_names"], + query_embeddings=query_embeddings, + k=k, + ) + else: + context = query_embeddings_doc( + collection_name=doc["collection_name"], + query_embeddings=query_embeddings, + k=k, + ) + except Exception as e: log.exception(e) context = None @@ -230,3 +321,26 @@ def get_embedding_model_path( except Exception as e: log.exception(f"Cannot determine embedding model snapshot path: {e}") return embedding_model + + +def generate_openai_embeddings( + model: str, text: str, key: str, url: str = "https://api.openai.com" +): + try: + r = requests.post( + f"{url}/v1/embeddings", + headers={ + "Content-Type": "application/json", + "Authorization": f"Bearer {key}", + }, + json={"input": text, "model": model}, + ) + r.raise_for_status() + data = r.json() + if "data" in data: + return data["data"][0]["embedding"] + else: + raise "Something went wrong :/" + except Exception as e: + print(e) + return None diff --git a/backend/config.py b/backend/config.py index 6e3cf92a9..938df9961 100644 --- a/backend/config.py +++ b/backend/config.py @@ -18,6 +18,51 @@ from secrets import token_bytes from constants import ERROR_MESSAGES +#################################### +# LOGGING +#################################### + +log_levels = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] + +GLOBAL_LOG_LEVEL = os.environ.get("GLOBAL_LOG_LEVEL", "").upper() +if GLOBAL_LOG_LEVEL in log_levels: + logging.basicConfig(stream=sys.stdout, level=GLOBAL_LOG_LEVEL, force=True) +else: + GLOBAL_LOG_LEVEL = "INFO" + +log = logging.getLogger(__name__) +log.info(f"GLOBAL_LOG_LEVEL: {GLOBAL_LOG_LEVEL}") + +log_sources = [ + "AUDIO", + "COMFYUI", + "CONFIG", + "DB", + "IMAGES", + "LITELLM", + "MAIN", + "MODELS", + "OLLAMA", + "OPENAI", + "RAG", + "WEBHOOK", +] + +SRC_LOG_LEVELS = {} + +for source in log_sources: + log_env_var = source + "_LOG_LEVEL" + SRC_LOG_LEVELS[source] = os.environ.get(log_env_var, "").upper() + if SRC_LOG_LEVELS[source] not in log_levels: + SRC_LOG_LEVELS[source] = GLOBAL_LOG_LEVEL + log.info(f"{log_env_var}: {SRC_LOG_LEVELS[source]}") + +log.setLevel(SRC_LOG_LEVELS["CONFIG"]) + +#################################### +# Load .env file +#################################### + try: from dotenv import load_dotenv, find_dotenv @@ -122,47 +167,6 @@ STATIC_DIR = str(Path(os.getenv("STATIC_DIR", "./static")).resolve()) shutil.copyfile(f"{FRONTEND_BUILD_DIR}/favicon.png", f"{STATIC_DIR}/favicon.png") -#################################### -# LOGGING -#################################### -log_levels = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"] - -GLOBAL_LOG_LEVEL = os.environ.get("GLOBAL_LOG_LEVEL", "").upper() -if GLOBAL_LOG_LEVEL in log_levels: - logging.basicConfig(stream=sys.stdout, level=GLOBAL_LOG_LEVEL, force=True) -else: - GLOBAL_LOG_LEVEL = "INFO" - -log = logging.getLogger(__name__) -log.info(f"GLOBAL_LOG_LEVEL: {GLOBAL_LOG_LEVEL}") - -log_sources = [ - "AUDIO", - "COMFYUI", - "CONFIG", - "DB", - "IMAGES", - "LITELLM", - "MAIN", - "MODELS", - "OLLAMA", - "OPENAI", - "RAG", - "WEBHOOK", -] - -SRC_LOG_LEVELS = {} - -for source in log_sources: - log_env_var = source + "_LOG_LEVEL" - SRC_LOG_LEVELS[source] = os.environ.get(log_env_var, "").upper() - if SRC_LOG_LEVELS[source] not in log_levels: - SRC_LOG_LEVELS[source] = GLOBAL_LOG_LEVEL - log.info(f"{log_env_var}: {SRC_LOG_LEVELS[source]}") - -log.setLevel(SRC_LOG_LEVELS["CONFIG"]) - - #################################### # CUSTOM_NAME #################################### @@ -401,6 +405,9 @@ if WEBUI_AUTH and WEBUI_SECRET_KEY == "": CHROMA_DATA_PATH = f"{DATA_DIR}/vector_db" # this uses the model defined in the Dockerfile ENV variable. If you dont use docker or docker based deployments such as k8s, the default embedding model will be used (all-MiniLM-L6-v2) + +RAG_EMBEDDING_ENGINE = os.environ.get("RAG_EMBEDDING_ENGINE", "") + RAG_EMBEDDING_MODEL = os.environ.get("RAG_EMBEDDING_MODEL", "all-MiniLM-L6-v2") log.info(f"Embedding model set: {RAG_EMBEDDING_MODEL}"), @@ -409,7 +416,7 @@ RAG_EMBEDDING_MODEL_AUTO_UPDATE = ( ) -# device type ebbeding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance +# device type embedding models - "cpu" (default), "cuda" (nvidia gpu required) or "mps" (apple silicon) - choosing this right can lead to better performance USE_CUDA = os.environ.get("USE_CUDA_DOCKER", "false") if USE_CUDA.lower() == "true": @@ -446,11 +453,17 @@ Query: [query]""" WHISPER_MODEL = os.getenv("WHISPER_MODEL", "base") WHISPER_MODEL_DIR = os.getenv("WHISPER_MODEL_DIR", f"{CACHE_DIR}/whisper/models") +WHISPER_MODEL_AUTO_UPDATE = ( + os.environ.get("WHISPER_MODEL_AUTO_UPDATE", "").lower() == "true" +) #################################### # Images #################################### +ENABLE_IMAGE_GENERATION = ( + os.environ.get("ENABLE_IMAGE_GENERATION", "").lower() == "true" +) AUTOMATIC1111_BASE_URL = os.getenv("AUTOMATIC1111_BASE_URL", "") COMFYUI_BASE_URL = os.getenv("COMFYUI_BASE_URL", "") diff --git a/backend/main.py b/backend/main.py index d63847bc0..4b1809a25 100644 --- a/backend/main.py +++ b/backend/main.py @@ -114,7 +114,11 @@ class RAGMiddleware(BaseHTTPMiddleware): data["messages"], rag_app.state.RAG_TEMPLATE, rag_app.state.TOP_K, + rag_app.state.RAG_EMBEDDING_ENGINE, + rag_app.state.RAG_EMBEDDING_MODEL, rag_app.state.sentence_transformer_ef, + rag_app.state.RAG_OPENAI_API_KEY, + rag_app.state.RAG_OPENAI_API_BASE_URL, ) del data["docs"] diff --git a/kubernetes/helm/templates/_helpers.tpl b/kubernetes/helm/templates/_helpers.tpl index 0f5145230..6233efab2 100644 --- a/kubernetes/helm/templates/_helpers.tpl +++ b/kubernetes/helm/templates/_helpers.tpl @@ -10,7 +10,7 @@ ollama {{- if .Values.ollama.externalHost }} {{- printf .Values.ollama.externalHost }} {{- else }} -{{- printf "http://%s.%s.svc.cluster.local:%d/" (include "ollama.name" .) (.Release.Namespace) (.Values.ollama.service.port | int) }} +{{- printf "http://%s.%s.svc.cluster.local:%d" (include "ollama.name" .) (.Release.Namespace) (.Values.ollama.service.port | int) }} {{- end }} {{- end }} diff --git a/package-lock.json b/package-lock.json index 7c117e8ac..83ec91d1c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "open-webui", - "version": "0.1.118", + "version": "0.1.119", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "open-webui", - "version": "0.1.118", + "version": "0.1.119", "dependencies": { "@sveltejs/adapter-node": "^1.3.1", "async": "^3.2.5", diff --git a/package.json b/package.json index 5f18eef2a..e88a0063b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "open-webui", - "version": "0.1.118", + "version": "0.1.119", "private": true, "scripts": { "dev": "vite dev --host", diff --git a/src/app.html b/src/app.html index f731761cf..2d1ef0d12 100644 --- a/src/app.html +++ b/src/app.html @@ -3,7 +3,7 @@ - + + + diff --git a/src/lib/components/common/Dropdown.svelte b/src/lib/components/common/Dropdown.svelte index e8284a504..eaceeb3e2 100644 --- a/src/lib/components/common/Dropdown.svelte +++ b/src/lib/components/common/Dropdown.svelte @@ -4,10 +4,12 @@ import { flyAndScale } from '$lib/utils/transitions'; + export let show = false; const dispatch = createEventDispatcher(); { dispatch('change', state); }} diff --git a/src/lib/components/common/Pagination.svelte b/src/lib/components/common/Pagination.svelte new file mode 100644 index 000000000..5ef7e7953 --- /dev/null +++ b/src/lib/components/common/Pagination.svelte @@ -0,0 +1,42 @@ + + +
+ +
+ + + +
+ {#each pages as page (page.key)} + {#if page.type === 'ellipsis'} +
...
+ {:else} + + {page.value} + + {/if} + {/each} +
+ + + +
+
+
diff --git a/src/lib/components/documents/AddDocModal.svelte b/src/lib/components/documents/AddDocModal.svelte index 22ea7b1ca..00c6bcda0 100644 --- a/src/lib/components/documents/AddDocModal.svelte +++ b/src/lib/components/documents/AddDocModal.svelte @@ -16,7 +16,6 @@ const i18n = getContext('i18n'); export let show = false; - export let selectedDoc; let uploadDocInputElement: HTMLInputElement; let inputFiles; let tags = []; diff --git a/src/lib/components/documents/Settings/General.svelte b/src/lib/components/documents/Settings/General.svelte index c94c1250b..18c501340 100644 --- a/src/lib/components/documents/Settings/General.svelte +++ b/src/lib/components/documents/Settings/General.svelte @@ -7,11 +7,11 @@ scanDocs, updateQuerySettings, resetVectorDB, - getEmbeddingModel, - updateEmbeddingModel + getEmbeddingConfig, + updateEmbeddingConfig } from '$lib/apis/rag'; - import { documents } from '$lib/stores'; + import { documents, models } from '$lib/stores'; import { onMount, getContext } from 'svelte'; import { toast } from 'svelte-sonner'; @@ -26,6 +26,12 @@ let showResetConfirm = false; + let embeddingEngine = ''; + let embeddingModel = ''; + + let openAIKey = ''; + let openAIUrl = ''; + let chunkSize = 0; let chunkOverlap = 0; let pdfExtractImages = true; @@ -35,8 +41,6 @@ k: 4 }; - let embeddingModel = ''; - const scanHandler = async () => { scanDirLoading = true; const res = await scanDocs(localStorage.token); @@ -49,7 +53,15 @@ }; const embeddingModelUpdateHandler = async () => { - if (embeddingModel.split('/').length - 1 > 1) { + if (embeddingEngine === '' && embeddingModel.split('/').length - 1 > 1) { + toast.error( + $i18n.t( + 'Model filesystem path detected. Model shortname is required for update, cannot continue.' + ) + ); + return; + } + if (embeddingEngine === 'ollama' && embeddingModel === '') { toast.error( $i18n.t( 'Model filesystem path detected. Model shortname is required for update, cannot continue.' @@ -58,14 +70,37 @@ return; } + if (embeddingEngine === 'openai' && embeddingModel === '') { + toast.error( + $i18n.t( + 'Model filesystem path detected. Model shortname is required for update, cannot continue.' + ) + ); + return; + } + + if ((embeddingEngine === 'openai' && openAIKey === '') || openAIUrl === '') { + toast.error($i18n.t('OpenAI URL/Key required.')); + return; + } + console.log('Update embedding model attempt:', embeddingModel); updateEmbeddingModelLoading = true; - const res = await updateEmbeddingModel(localStorage.token, { - embedding_model: embeddingModel + const res = await updateEmbeddingConfig(localStorage.token, { + embedding_engine: embeddingEngine, + embedding_model: embeddingModel, + ...(embeddingEngine === 'openai' + ? { + openai_config: { + key: openAIKey, + url: openAIUrl + } + } + : {}) }).catch(async (error) => { toast.error(error); - embeddingModel = (await getEmbeddingModel(localStorage.token)).embedding_model; + await setEmbeddingConfig(); return null; }); updateEmbeddingModelLoading = false; @@ -73,7 +108,7 @@ if (res) { console.log('embeddingModelUpdateHandler:', res); if (res.status === true) { - toast.success($i18n.t('Model {{embedding_model}} update complete!', res), { + toast.success($i18n.t('Embedding model set to "{{embedding_model}}"', res), { duration: 1000 * 10 }); } @@ -91,6 +126,18 @@ querySettings = await updateQuerySettings(localStorage.token, querySettings); }; + const setEmbeddingConfig = async () => { + const embeddingConfig = await getEmbeddingConfig(localStorage.token); + + if (embeddingConfig) { + embeddingEngine = embeddingConfig.embedding_engine; + embeddingModel = embeddingConfig.embedding_model; + + openAIKey = embeddingConfig.openai_config.key; + openAIUrl = embeddingConfig.openai_config.url; + } + }; + onMount(async () => { const res = await getRAGConfig(localStorage.token); @@ -101,7 +148,7 @@ chunkOverlap = res.chunk.chunk_overlap; } - embeddingModel = (await getEmbeddingModel(localStorage.token)).embedding_model; + await setEmbeddingConfig(); querySettings = await getQuerySettings(localStorage.token); }); @@ -118,81 +165,212 @@
{$i18n.t('General Settings')}
-
-
- {$i18n.t('Scan for documents from {{path}}', { path: '/data/docs' })} +
+
{$i18n.t('Embedding Model Engine')}
+
+
- -
-
-
+ {#if embeddingEngine === 'openai'} +
+ + + +
+ {/if} +
{$i18n.t('Update Embedding Model')}
-
-
- -
- +
+ {:else} +
+
+ +
+ +
+ {/if} + +
+ {$i18n.t( + 'Warning: If you update or change your embedding model, you will need to re-import all documents.' + )} +
+ +
+ +
+
+ {$i18n.t('Scan for documents from {{path}}', { path: '/data/docs' })} +
+ +
-
- {$i18n.t( - 'Warning: If you update or change your embedding model, you will need to re-import all documents.' - )} -
-
diff --git a/src/lib/components/icons/ChevronLeft.svelte b/src/lib/components/icons/ChevronLeft.svelte new file mode 100644 index 000000000..78ee64d24 --- /dev/null +++ b/src/lib/components/icons/ChevronLeft.svelte @@ -0,0 +1,15 @@ + + + + + diff --git a/src/lib/components/icons/ChevronRight.svelte b/src/lib/components/icons/ChevronRight.svelte new file mode 100644 index 000000000..7daf4a14a --- /dev/null +++ b/src/lib/components/icons/ChevronRight.svelte @@ -0,0 +1,15 @@ + + + + + diff --git a/src/lib/components/layout/Navbar.svelte b/src/lib/components/layout/Navbar.svelte index 4f3806fc5..2f9a54b54 100644 --- a/src/lib/components/layout/Navbar.svelte +++ b/src/lib/components/layout/Navbar.svelte @@ -19,10 +19,6 @@ export let chat; export let selectedModels; - export let tags = []; - export let addTag: Function; - export let deleteTag: Function; - export let showModelSelector = true; let showShareChatModal = false; @@ -85,9 +81,6 @@ downloadHandler={() => { showDownloadChatModal = !showDownloadChatModal; }} - {tags} - {deleteTag} - {addTag} >