wip: retrieval

This commit is contained in:
Timothy Jaeryang Baek
2024-12-11 18:05:42 -08:00
parent 3ec0a58cd7
commit 867c4bc0d0
24 changed files with 897 additions and 546 deletions

View File

@@ -11,7 +11,7 @@ from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriev
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT
from open_webui.utils.misc import get_last_user_message
from open_webui.env import SRC_LOG_LEVELS

View File

@@ -1,22 +1,22 @@
from open_webui.config import VECTOR_DB
if VECTOR_DB == "milvus":
from open_webui.apps.retrieval.vector.dbs.milvus import MilvusClient
from open_webui.retrieval.vector.dbs.milvus import MilvusClient
VECTOR_DB_CLIENT = MilvusClient()
elif VECTOR_DB == "qdrant":
from open_webui.apps.retrieval.vector.dbs.qdrant import QdrantClient
from open_webui.retrieval.vector.dbs.qdrant import QdrantClient
VECTOR_DB_CLIENT = QdrantClient()
elif VECTOR_DB == "opensearch":
from open_webui.apps.retrieval.vector.dbs.opensearch import OpenSearchClient
from open_webui.retrieval.vector.dbs.opensearch import OpenSearchClient
VECTOR_DB_CLIENT = OpenSearchClient()
elif VECTOR_DB == "pgvector":
from open_webui.apps.retrieval.vector.dbs.pgvector import PgvectorClient
from open_webui.retrieval.vector.dbs.pgvector import PgvectorClient
VECTOR_DB_CLIENT = PgvectorClient()
else:
from open_webui.apps.retrieval.vector.dbs.chroma import ChromaClient
from open_webui.retrieval.vector.dbs.chroma import ChromaClient
VECTOR_DB_CLIENT = ChromaClient()

View File

@@ -4,7 +4,7 @@ from chromadb.utils.batch_utils import create_batches
from typing import Optional
from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import (
CHROMA_DATA_PATH,
CHROMA_HTTP_HOST,

View File

@@ -4,7 +4,7 @@ import json
from typing import Optional
from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import (
MILVUS_URI,
)

View File

@@ -1,7 +1,7 @@
from opensearchpy import OpenSearch
from typing import Optional
from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import (
OPENSEARCH_URI,
OPENSEARCH_SSL,

View File

@@ -18,7 +18,7 @@ from sqlalchemy.dialects.postgresql import JSONB, array
from pgvector.sqlalchemy import Vector
from sqlalchemy.ext.mutable import MutableDict
from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import PGVECTOR_DB_URL
VECTOR_LENGTH = 1536

View File

@@ -4,7 +4,7 @@ from qdrant_client import QdrantClient as Qclient
from qdrant_client.http.models import PointStruct
from qdrant_client.models import models
from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import QDRANT_URI, QDRANT_API_KEY
NO_LIMIT = 999999999

View File

@@ -3,7 +3,7 @@ import os
from pprint import pprint
from typing import Optional
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
import argparse

View File

@@ -2,7 +2,7 @@ import logging
from typing import Optional
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)

View File

@@ -1,7 +1,7 @@
import logging
from typing import Optional
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from duckduckgo_search import DDGS
from open_webui.env import SRC_LOG_LEVELS

View File

@@ -2,7 +2,7 @@ import logging
from typing import Optional
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)

View File

@@ -1,7 +1,7 @@
import logging
import requests
from open_webui.apps.retrieval.web.main import SearchResult
from open_webui.retrieval.web.main import SearchResult
from open_webui.env import SRC_LOG_LEVELS
from yarl import URL

View File

@@ -2,7 +2,7 @@ import logging
from typing import Optional
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
@@ -31,17 +31,15 @@ def search_kagi(
response.raise_for_status()
json_response = response.json()
search_results = json_response.get("data", [])
results = [
SearchResult(
link=result["url"],
title=result["title"],
snippet=result.get("snippet")
link=result["url"], title=result["title"], snippet=result.get("snippet")
)
for result in search_results
if result["t"] == 0
]
print(results)
if filter_list:

View File

@@ -2,7 +2,7 @@ import logging
from typing import Optional
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)

View File

@@ -3,7 +3,7 @@ from typing import Optional
from urllib.parse import urlencode
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)

View File

@@ -2,7 +2,7 @@ import logging
from typing import Optional
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)

View File

@@ -3,7 +3,7 @@ import logging
from typing import Optional
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)

View File

@@ -3,7 +3,7 @@ from typing import Optional
from urllib.parse import urlencode
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)

View File

@@ -2,7 +2,7 @@ import logging
from typing import Optional
import requests
from open_webui.apps.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)

View File

@@ -1,7 +1,7 @@
import logging
import requests
from open_webui.apps.retrieval.web.main import SearchResult
from open_webui.retrieval.web.main import SearchResult
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)