mirror of
https://github.com/clearml/clearml-serving
synced 2025-06-26 18:16:00 +00:00
fix imports
This commit is contained in:
parent
9441ae8473
commit
9bb0dbb182
@ -615,11 +615,38 @@ class VllmEngine(Singleton):
|
|||||||
|
|
||||||
# load vLLM Modules
|
# load vLLM Modules
|
||||||
if self._vllm is None:
|
if self._vllm is None:
|
||||||
from vllm import entrypoints, engine, usage
|
# from vllm import entrypoints, engine, usage
|
||||||
self._vllm = {}
|
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||||
self._vllm["entrypoints"] = entrypoints
|
from vllm.engine.async_llm_engine import AsyncLLMEngine
|
||||||
self._vllm["engine"] = engine
|
from vllm.entrypoints.logger import RequestLogger
|
||||||
self._vllm["usage"] = usage
|
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
||||||
|
from vllm.entrypoints.openai.serving_models import OpenAIServingModels, LoRAModulePath, PromptAdapterPath, BaseModelPath
|
||||||
|
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||||
|
from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
||||||
|
from vllm.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
|
||||||
|
from vllm.entrypoints.openai.serving_tokenization import OpenAIServingTokenization
|
||||||
|
from vllm.entrypoints.openai.protocol import ChatCompletionResponse, CompletionResponse, ErrorResponse
|
||||||
|
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
|
||||||
|
from vllm.usage.usage_lib import UsageContext
|
||||||
|
self._vllm = {
|
||||||
|
"AsyncEngineArgs": AsyncEngineArgs,
|
||||||
|
"AsyncLLMEngine": AsyncLLMEngine,
|
||||||
|
"RequestLogger": RequestLogger,
|
||||||
|
"OpenAIServing": OpenAIServing,
|
||||||
|
"OpenAIServingModels": OpenAIServingModels,
|
||||||
|
"LoRAModulePath": LoRAModulePath,
|
||||||
|
"PromptAdapterPath": PromptAdapterPath,
|
||||||
|
"BaseModelPath": BaseModelPath,
|
||||||
|
"OpenAIServingChat": OpenAIServingChat,
|
||||||
|
"OpenAIServingCompletion": OpenAIServingCompletion,
|
||||||
|
"OpenAIServingEmbedding": OpenAIServingEmbedding,
|
||||||
|
"OpenAIServingTokenization": OpenAIServingTokenization,
|
||||||
|
"ChatCompletionResponse": ChatCompletionResponse,
|
||||||
|
"CompletionResponse": CompletionResponse,
|
||||||
|
"ErrorResponse": ErrorResponse,
|
||||||
|
"ChatTemplateContentFormatOption": ChatTemplateContentFormatOption,
|
||||||
|
"UsageContext": UsageContext
|
||||||
|
}
|
||||||
|
|
||||||
if self._fastapi is None:
|
if self._fastapi is None:
|
||||||
from fastapi.responses import JSONResponse, StreamingResponse
|
from fastapi.responses import JSONResponse, StreamingResponse
|
||||||
@ -647,85 +674,75 @@ class VllmEngine(Singleton):
|
|||||||
self.add_models(name=name, model_path=model_path)
|
self.add_models(name=name, model_path=model_path)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
vllm_engine_config = json.loads(os.environ.get("VLLM_ENGINE_ARGS"))
|
vllm_engine_config = json.loads(os.environ.get("VLLM_ENGINE_ARGS").replace("'", ""))
|
||||||
engine_args = self._vllm["engine"].arg_utils.AsyncEngineArgs(**vllm_engine_config)
|
vllm_engine_config["model"] = model_path
|
||||||
async_engine_client = self._vllm["engine"].async_llm_engine.AsyncLLMEngine.from_engine_args(
|
vllm_engine_config["served_model_name"] = name
|
||||||
|
engine_args = self._vllm["AsyncEngineArgs"](**vllm_engine_config)
|
||||||
|
async_engine_client = self._vllm["AsyncLLMEngine"].from_engine_args(
|
||||||
engine_args,
|
engine_args,
|
||||||
usage_context=self._vllm["usage"].usage_lib.UsageContext.OPENAI_API_SERVER
|
usage_context=self._vllm["UsageContext"].OPENAI_API_SERVER
|
||||||
)
|
)
|
||||||
model_config = async_engine_client.engine.get_model_config()
|
model_config = async_engine_client.engine.get_model_config()
|
||||||
request_logger = self._vllm["entrypoints"].logger.RequestLogger(
|
request_logger = self._vllm["RequestLogger"](
|
||||||
max_log_len=vllm_model_config["max_log_len"]
|
max_log_len=vllm_model_config["max_log_len"]
|
||||||
)
|
)
|
||||||
self._model["openai_serving_models"] = self._vllm[
|
self._model["openai_serving_models"] = self._vllm["OpenAIServingModels"](
|
||||||
"entrypoints"
|
async_engine_client,
|
||||||
].openai.serving_models.OpenAIServingModels(
|
model_config,
|
||||||
async_engine_client,
|
[
|
||||||
model_config,
|
self._vllm["BaseModelPath"](
|
||||||
[
|
name=name,
|
||||||
self._vllm["entrypoints"].openai.serving_models.BaseModelPath(
|
model_path=model_path
|
||||||
name=name,
|
)
|
||||||
model_path=model_path
|
],
|
||||||
)
|
lora_modules=vllm_model_config["lora_modules"],
|
||||||
],
|
prompt_adapters=vllm_model_config["prompt_adapters"],
|
||||||
lora_modules=svllm_model_config["lora_modules"],
|
|
||||||
prompt_adapters=vllm_model_config["prompt_adapters"],
|
|
||||||
)
|
)
|
||||||
await self._model["openai_serving_models"].init_static_loras()
|
# await self._model["openai_serving_models"].init_static_loras()
|
||||||
self._model["openai_serving"] = self._vllm[
|
self._model["openai_serving"] = self._vllm["OpenAIServing"](
|
||||||
"entrypoints"
|
async_engine_client,
|
||||||
].openai.serving_engine.OpenAIServing(
|
model_config,
|
||||||
async_engine_client,
|
self._model["openai_serving_models"],
|
||||||
model_config,
|
request_logger=request_logger,
|
||||||
self._model["openai_serving_models"],
|
return_tokens_as_token_ids=vllm_model_config["return_tokens_as_token_ids"]
|
||||||
request_logger=request_logger,
|
|
||||||
return_tokens_as_token_ids=vllm_model_config["return_tokens_as_token_ids"]
|
|
||||||
)
|
)
|
||||||
self._model["openai_serving_chat"] = self._vllm[
|
self._model["openai_serving_chat"] = self._vllm["OpenAIServingChat"](
|
||||||
"entrypoints"
|
async_engine_client,
|
||||||
].openai.serving_chat.OpenAIServingChat(
|
model_config,
|
||||||
async_engine_client,
|
self._model["openai_serving_models"],
|
||||||
model_config,
|
response_role=vllm_model_config["response_role"],
|
||||||
self._model["openai_serving_models"],
|
request_logger=request_logger,
|
||||||
response_role=vllm_model_config["response_role"],
|
chat_template=vllm_model_config["chat_template"],
|
||||||
request_logger=request_logger,
|
chat_template_content_format=chat_settings["chat_template_content_format"],
|
||||||
chat_template=vllm_model_config["chat_template"],
|
return_tokens_as_token_ids=vllm_model_config["return_tokens_as_token_ids"],
|
||||||
chat_template_content_format=chat_settings["chat_template_content_format"],
|
enable_reasoning=chat_settings["enable_reasoning"],
|
||||||
return_tokens_as_token_ids=vllm_model_config["return_tokens_as_token_ids"],
|
reasoning_parser=chat_settings["reasoning_parser"],
|
||||||
enable_reasoning=chat_settings["enable_reasoning"],
|
enable_auto_tools=chat_settings["enable_auto_tools"],
|
||||||
reasoning_parser=chat_settings["reasoning_parser"],
|
tool_parser=chat_settings["tool_parser"],
|
||||||
enable_auto_tools=chat_settings["enable_auto_tools"],
|
enable_prompt_tokens_details=chat_settings["enable_prompt_tokens_details"]
|
||||||
tool_parser=chat_settings["tool_parser"],
|
|
||||||
enable_prompt_tokens_details=chat_settings["enable_prompt_tokens_details"]
|
|
||||||
) if model_config.runner_type == "generate" else None
|
) if model_config.runner_type == "generate" else None
|
||||||
self._model["openai_serving_completion"] = self._vllm[
|
self._model["openai_serving_completion"] = self._vllm["OpenAIServingCompletion"](
|
||||||
"entrypoints"
|
async_engine_client,
|
||||||
].openai.serving_completion.OpenAIServingCompletion(
|
model_config,
|
||||||
async_engine_client,
|
self._model["openai_serving_models"],
|
||||||
model_config,
|
request_logger=request_logger,
|
||||||
self._model["openai_serving_models"],
|
return_tokens_as_token_ids=vllm_model_config["return_tokens_as_token_ids"]
|
||||||
request_logger=request_logger,
|
|
||||||
return_tokens_as_token_ids=vllm_model_config["return_tokens_as_token_ids"]
|
|
||||||
) if model_config.runner_type == "generate" else None
|
) if model_config.runner_type == "generate" else None
|
||||||
self._model["openai_serving_embedding"] = self._vllm[
|
self._model["openai_serving_embedding"] = self._vllm["OpenAIServingEmbedding"](
|
||||||
"entrypoints"
|
async_engine_client,
|
||||||
].openai.serving_embedding.OpenAIServingEmbedding(
|
model_config,
|
||||||
async_engine_client,
|
self._model["openai_serving_models"],
|
||||||
model_config,
|
request_logger=request_logger,
|
||||||
self._model["openai_serving_models"],
|
chat_template=vllm_model_config["chat_template"],
|
||||||
request_logger=request_logger,
|
chat_template_content_format=chat_settings["chat_template_content_format"]
|
||||||
chat_template=vllm_model_config["chat_template"],
|
|
||||||
chat_template_content_format=chat_settings["chat_template_content_format"]
|
|
||||||
) if model_config.task == "embed" else None
|
) if model_config.task == "embed" else None
|
||||||
self._model["openai_serving_tokenization"] = self._vllm[
|
self._model["openai_serving_tokenization"] = self._vllm["OpenAIServingTokenization"](
|
||||||
"entrypoints"
|
async_engine_client,
|
||||||
].openai.serving_tokenization.OpenAIServingTokenization(
|
model_config,
|
||||||
async_engine_client,
|
self._model["openai_serving_models"],
|
||||||
model_config,
|
request_logger=request_logger,
|
||||||
self._model["openai_serving_models"],
|
chat_template=vllm_model_config["chat_template"],
|
||||||
request_logger=request_logger,
|
chat_template_content_format=chat_settings["chat_template_content_format"]
|
||||||
chat_template=vllm_model_config["chat_template"],
|
|
||||||
chat_template_content_format=chat_settings["chat_template_content_format"]
|
|
||||||
)
|
)
|
||||||
self.logger.info("vLLM Engine was successfully initialized")
|
self.logger.info("vLLM Engine was successfully initialized")
|
||||||
self.is_already_loaded = True
|
self.is_already_loaded = True
|
||||||
@ -733,7 +750,7 @@ class VllmEngine(Singleton):
|
|||||||
|
|
||||||
def add_models(self, name: str, model_path: str):
|
def add_models(self, name: str, model_path: str):
|
||||||
self._model["openai_serving_models"].base_model_paths.append(
|
self._model["openai_serving_models"].base_model_paths.append(
|
||||||
self._vllm["entrypoints"].openai.serving_models.BaseModelPath(
|
self._vllm["BaseModelPath"](
|
||||||
name=name,
|
name=name,
|
||||||
model_path=model_path
|
model_path=model_path
|
||||||
)
|
)
|
||||||
@ -759,13 +776,12 @@ class VllmEngine(Singleton):
|
|||||||
message="The model does not support Completions API"
|
message="The model does not support Completions API"
|
||||||
)
|
)
|
||||||
generator = await handler.create_completion(request=request, raw_request=raw_request)
|
generator = await handler.create_completion(request=request, raw_request=raw_request)
|
||||||
if isinstance(generator, self._vllm["entrypoints"].openai.protocol.ErrorResponse):
|
if isinstance(generator, self._vllm["ErrorResponse"]):
|
||||||
return self._fastapi["json_response"](content=generator.model_dump(), status_code=generator.code)
|
return self._fastapi["json_response"](content=generator.model_dump(), status_code=generator.code)
|
||||||
elif isinstance(generator, self._vllm["entrypoints"].openai.protocol.CompletionResponse):
|
elif isinstance(generator, self._vllm["CompletionResponse"]):
|
||||||
return self._fastapi["json_response"](content=generator.model_dump())
|
return self._fastapi["json_response"](content=generator.model_dump())
|
||||||
return self._fastapi["streaming_response"](content=generator, media_type="text/event-stream")
|
return self._fastapi["streaming_response"](content=generator, media_type="text/event-stream")
|
||||||
|
|
||||||
|
|
||||||
async def chat_completions(
|
async def chat_completions(
|
||||||
self,
|
self,
|
||||||
data: Any,
|
data: Any,
|
||||||
@ -784,12 +800,20 @@ class VllmEngine(Singleton):
|
|||||||
message="The model does not support Chat Completions API"
|
message="The model does not support Chat Completions API"
|
||||||
)
|
)
|
||||||
generator = await handler.create_chat_completion(request=request, raw_request=raw_request)
|
generator = await handler.create_chat_completion(request=request, raw_request=raw_request)
|
||||||
if isinstance(generator, self._vllm["entrypoints"].openai.protocol.ErrorResponse):
|
if isinstance(generator, self._vllm["ErrorResponse"]):
|
||||||
return self._fastapi["json_response"](content=generator.model_dump(), status_code=generator.code)
|
return self._fastapi["json_response"](content=generator.model_dump(), status_code=generator.code)
|
||||||
elif isinstance(generator, self._vllm["entrypoints"].openai.protocol.ChatCompletionResponse):
|
elif isinstance(generator, self._vllm["ChatCompletionResponse"]):
|
||||||
return self._fastapi["json_response"](content=generator.model_dump())
|
return self._fastapi["json_response"](content=generator.model_dump())
|
||||||
return self._fastapi["streaming_response"](content=generator, media_type="text/event-stream")
|
return self._fastapi["streaming_response"](content=generator, media_type="text/event-stream")
|
||||||
|
|
||||||
|
async def models(
|
||||||
|
self,
|
||||||
|
data: Any,
|
||||||
|
state: dict,
|
||||||
|
collect_custom_statistics_fn: Callable[[dict], None] = None
|
||||||
|
) -> Any:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
@BasePreprocessRequest.register_engine("vllm", modules=["vllm", "fastapi"])
|
@BasePreprocessRequest.register_engine("vllm", modules=["vllm", "fastapi"])
|
||||||
class VllmPreprocessRequest(BasePreprocessRequest):
|
class VllmPreprocessRequest(BasePreprocessRequest):
|
||||||
@ -881,7 +905,7 @@ class VllmPreprocessRequest(BasePreprocessRequest):
|
|||||||
The actual processing function.
|
The actual processing function.
|
||||||
We run the process in this context
|
We run the process in this context
|
||||||
"""
|
"""
|
||||||
return self._vllm_engine.completions(data=data, state=state, collect_custom_statistics_fn=collect_custom_statistics_fn)
|
return await self._vllm_engine.completions(data=data, state=state, collect_custom_statistics_fn=collect_custom_statistics_fn)
|
||||||
|
|
||||||
|
|
||||||
async def chat_completions(self, data: Any, state: dict, collect_custom_statistics_fn: Callable[[dict], None] = None) -> Any:
|
async def chat_completions(self, data: Any, state: dict, collect_custom_statistics_fn: Callable[[dict], None] = None) -> Any:
|
||||||
@ -889,9 +913,16 @@ class VllmPreprocessRequest(BasePreprocessRequest):
|
|||||||
The actual processing function.
|
The actual processing function.
|
||||||
We run the process in this context
|
We run the process in this context
|
||||||
"""
|
"""
|
||||||
return self._vllm_engine.chat_completions(data=data, state=state, collect_custom_statistics_fn=collect_custom_statistics_fn)
|
return await self._vllm_engine.chat_completions(data=data, state=state, collect_custom_statistics_fn=collect_custom_statistics_fn)
|
||||||
|
|
||||||
|
|
||||||
|
async def models(self, data: Any, state: dict, collect_custom_statistics_fn: Callable[[dict], None] = None) -> Any:
|
||||||
|
"""
|
||||||
|
The actual processing function.
|
||||||
|
We run the process in this context
|
||||||
|
"""
|
||||||
|
return self._vllm_engine.models(data=data, state=state, collect_custom_statistics_fn=collect_custom_statistics_fn)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def _preprocess_send_request(_, endpoint: str, version: str = None, data: dict = None) -> Optional[dict]:
|
async def _preprocess_send_request(_, endpoint: str, version: str = None, data: dict = None) -> Optional[dict]:
|
||||||
endpoint = "/openai/v1/{}".format(endpoint.strip("/"))
|
endpoint = "/openai/v1/{}".format(endpoint.strip("/"))
|
||||||
|
@ -105,7 +105,7 @@ services:
|
|||||||
GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-}
|
GOOGLE_APPLICATION_CREDENTIALS: ${GOOGLE_APPLICATION_CREDENTIALS:-}
|
||||||
AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-}
|
AZURE_STORAGE_ACCOUNT: ${AZURE_STORAGE_ACCOUNT:-}
|
||||||
AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-}
|
AZURE_STORAGE_KEY: ${AZURE_STORAGE_KEY:-}
|
||||||
VLLM_ENGINE_ARGS: ${VLLM_ENGINE_ARGS:-'{"disable_log_requests":true,"disable_log_stats":false,"gpu_memory_utilization":0.95,"quantization":null,"enforce_eager":true}'}
|
VLLM_ENGINE_ARGS: ${VLLM_ENGINE_ARGS:-'{"disable_log_requests":true,"disable_log_stats":false,"gpu_memory_utilization":0.95,"enforce_eager":true}'}
|
||||||
depends_on:
|
depends_on:
|
||||||
- kafka
|
- kafka
|
||||||
networks:
|
networks:
|
||||||
|
@ -11,18 +11,6 @@ class Preprocess:
|
|||||||
|
|
||||||
def load(self, local_file_name: str) -> Optional[Any]: # noqa
|
def load(self, local_file_name: str) -> Optional[Any]: # noqa
|
||||||
|
|
||||||
# vllm_engine_config = {
|
|
||||||
# "model": local_file_name,
|
|
||||||
# "tokenizer": local_file_name,
|
|
||||||
# "disable_log_requests": True,
|
|
||||||
# "disable_log_stats": False,
|
|
||||||
# "gpu_memory_utilization": 0.9,
|
|
||||||
# "quantization": None,
|
|
||||||
# "enforce_eager": True,
|
|
||||||
# "served_model_name": "test_vllm",
|
|
||||||
# "dtype": "float16",
|
|
||||||
# "max_model_len": 8192
|
|
||||||
# }
|
|
||||||
vllm_model_config = {
|
vllm_model_config = {
|
||||||
"lora_modules": None, # [LoRAModulePath(name=a, path=b)]
|
"lora_modules": None, # [LoRAModulePath(name=a, path=b)]
|
||||||
"prompt_adapters": None, # [PromptAdapterPath(name=a, path=b)]
|
"prompt_adapters": None, # [PromptAdapterPath(name=a, path=b)]
|
||||||
@ -39,66 +27,7 @@ class Preprocess:
|
|||||||
"enable_prompt_tokens_details": False,
|
"enable_prompt_tokens_details": False,
|
||||||
"chat_template_content_format": "auto"
|
"chat_template_content_format": "auto"
|
||||||
}
|
}
|
||||||
# self._model = {}
|
|
||||||
# engine_args = AsyncEngineArgs(**self.vllm_engine_config)
|
|
||||||
# async_engine_client = AsyncLLMEngine.from_engine_args(self.engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
|
|
||||||
# model_config = async_engine_client.engine.get_model_config()
|
|
||||||
# request_logger = RequestLogger(max_log_len=self.vllm_model_config["max_log_len"])
|
|
||||||
# self._model["openai_serving_models"] = OpenAIServingModels(
|
|
||||||
# async_engine_client,
|
|
||||||
# self.model_config,
|
|
||||||
# [BaseModelPath(name=self.vllm_engine_config["served_model_name"], model_path=self.vllm_engine_config["model"])],
|
|
||||||
# lora_modules=self.vllm_model_config["lora_modules"],
|
|
||||||
# prompt_adapters=self.vllm_model_config["prompt_adapters"],
|
|
||||||
# )
|
|
||||||
# self._model["openai_serving"] = OpenAIServing(
|
|
||||||
# async_engine_client,
|
|
||||||
# self.model_config,
|
|
||||||
# self._model["openai_serving_models"],
|
|
||||||
# request_logger=request_logger,
|
|
||||||
# return_tokens_as_token_ids=self.vllm_model_config["return_tokens_as_token_ids"]
|
|
||||||
# )
|
|
||||||
# self._model["openai_serving_chat"] = OpenAIServingChat(
|
|
||||||
# async_engine_client,
|
|
||||||
# self.model_config,
|
|
||||||
# self._model["openai_serving_models"],
|
|
||||||
# response_role=self.vllm_model_config["response_role"],
|
|
||||||
# request_logger=request_logger,
|
|
||||||
# chat_template=self.vllm_model_config["chat_template"],
|
|
||||||
# chat_template_content_format=self.chat_settings["chat_template_content_format"],
|
|
||||||
# return_tokens_as_token_ids=self.vllm_model_config["return_tokens_as_token_ids"],
|
|
||||||
# enable_reasoning=self.chat_settings["enable_reasoning"],
|
|
||||||
# reasoning_parser=self.chat_settings["reasoning_parser"],
|
|
||||||
# enable_auto_tools=self.chat_settings["enable_auto_tools"],
|
|
||||||
# tool_parser=self.chat_settings["tool_parser"],
|
|
||||||
# enable_prompt_tokens_details=self.chat_settings["enable_prompt_tokens_details"]
|
|
||||||
# ) if self.model_config.runner_type == "generate" else None
|
|
||||||
# self._model["openai_serving_completion"] = OpenAIServingCompletion(
|
|
||||||
# async_engine_client,
|
|
||||||
# self.model_config,
|
|
||||||
# self._model["openai_serving_models"],
|
|
||||||
# request_logger=request_logger,
|
|
||||||
# return_tokens_as_token_ids=self.vllm_model_config["return_tokens_as_token_ids"]
|
|
||||||
# ) if self.model_config.runner_type == "generate" else None
|
|
||||||
# self._model["openai_serving_embedding"] = OpenAIServingEmbedding(
|
|
||||||
# async_engine_client,
|
|
||||||
# self.model_config,
|
|
||||||
# self._model["openai_serving_models"],
|
|
||||||
# request_logger=request_logger,
|
|
||||||
# chat_template=self.vllm_model_config["chat_template"],
|
|
||||||
# chat_template_content_format=self.chat_settings["chat_template_content_format"]
|
|
||||||
# ) if self.model_config.task == "embed" else None
|
|
||||||
# self._model["openai_serving_tokenization"] = OpenAIServingTokenization(
|
|
||||||
# async_engine_client,
|
|
||||||
# self.model_config,
|
|
||||||
# self._model["openai_serving_models"],
|
|
||||||
# request_logger=request_logger,
|
|
||||||
# chat_template=self.vllm_model_config["chat_template"],
|
|
||||||
# chat_template_content_format=self.chat_settings["chat_template_content_format"]
|
|
||||||
# )
|
|
||||||
# return self._model
|
|
||||||
return {
|
return {
|
||||||
# "vllm_engine_config": vllm_engine_config,
|
|
||||||
"vllm_model_config": vllm_model_config,
|
"vllm_model_config": vllm_model_config,
|
||||||
"chat_settings": chat_settings
|
"chat_settings": chat_settings
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user