diff --git a/backend/open_webui/utils/chat.py b/backend/open_webui/utils/chat.py index 96d7693b5..676a4a203 100644 --- a/backend/open_webui/utils/chat.py +++ b/backend/open_webui/utils/chat.py @@ -136,7 +136,7 @@ async def generate_chat_completion( response = await generate_ollama_chat_completion( request=request, form_data=form_data, user=user, bypass_filter=bypass_filter ) - if form_data.stream: + if form_data.get("stream"): response.headers["content-type"] = "text/event-stream" return StreamingResponse( convert_streaming_response_ollama_to_openai(response), diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index a5af492ba..aba696f60 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -106,7 +106,7 @@ def openai_chat_message_template(model: str): def openai_chat_chunk_message_template( - model: str, message: Optional[str] = None + model: str, message: Optional[str] = None, usage: Optional[dict] = None ) -> dict: template = openai_chat_message_template(model) template["object"] = "chat.completion.chunk" @@ -114,17 +114,23 @@ def openai_chat_chunk_message_template( template["choices"][0]["delta"] = {"content": message} else: template["choices"][0]["finish_reason"] = "stop" + + if usage: + template["usage"] = usage return template def openai_chat_completion_message_template( - model: str, message: Optional[str] = None + model: str, message: Optional[str] = None, usage: Optional[dict] = None ) -> dict: template = openai_chat_message_template(model) template["object"] = "chat.completion" if message is not None: template["choices"][0]["message"] = {"content": message, "role": "assistant"} template["choices"][0]["finish_reason"] = "stop" + + if usage: + template["usage"] = usage return template diff --git a/backend/open_webui/utils/response.py b/backend/open_webui/utils/response.py index b8501e92c..891016e43 100644 --- a/backend/open_webui/utils/response.py +++ b/backend/open_webui/utils/response.py @@ -21,8 +21,63 @@ async def convert_streaming_response_ollama_to_openai(ollama_streaming_response) message_content = data.get("message", {}).get("content", "") done = data.get("done", False) + usage = None + if done: + usage = { + "response_token/s": ( + round( + ( + ( + data.get("eval_count", 0) + / ((data.get("eval_duration", 0) / 1_000_000_000)) + ) + * 100 + ), + 2, + ) + if data.get("eval_duration", 0) > 0 + else "N/A" + ), + "prompt_token/s": ( + round( + ( + ( + data.get("prompt_eval_count", 0) + / ( + ( + data.get("prompt_eval_duration", 0) + / 1_000_000_000 + ) + ) + ) + * 100 + ), + 2, + ) + if data.get("prompt_eval_duration", 0) > 0 + else "N/A" + ), + "total_duration": round( + ((data.get("total_duration", 0) / 1_000_000) * 100), 2 + ), + "load_duration": round( + ((data.get("load_duration", 0) / 1_000_000) * 100), 2 + ), + "prompt_eval_count": data.get("prompt_eval_count", 0), + "prompt_eval_duration": round( + ((data.get("prompt_eval_duration", 0) / 1_000_000) * 100), 2 + ), + "eval_count": data.get("eval_count", 0), + "eval_duration": round( + ((data.get("eval_duration", 0) / 1_000_000) * 100), 2 + ), + "approximate_total": ( + lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s" + )((data.get("total_duration", 0) or 0) // 1_000_000_000), + } + data = openai_chat_chunk_message_template( - model, message_content if not done else None + model, message_content if not done else None, usage ) line = f"data: {json.dumps(data)}\n\n" diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts index 54804385d..5617ce36c 100644 --- a/src/lib/apis/streaming/index.ts +++ b/src/lib/apis/streaming/index.ts @@ -77,10 +77,14 @@ async function* openAIStreamToIterator( continue; } + if (parsedData.usage) { + yield { done: false, value: '', usage: parsedData.usage }; + continue; + } + yield { done: false, value: parsedData.choices?.[0]?.delta?.content ?? '', - usage: parsedData.usage }; } catch (e) { console.error('Error extracting delta from SSE event:', e); @@ -98,10 +102,26 @@ async function* streamLargeDeltasAsRandomChunks( yield textStreamUpdate; return; } + + if (textStreamUpdate.error) { + yield textStreamUpdate; + continue; + } if (textStreamUpdate.sources) { yield textStreamUpdate; continue; } + if (textStreamUpdate.selectedModelId) { + yield textStreamUpdate; + continue; + } + if (textStreamUpdate.usage) { + yield textStreamUpdate; + continue; + } + + + let content = textStreamUpdate.value; if (content.length < 5) { yield { done: false, value: content }; diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index e6a653420..a55cbc87b 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -455,41 +455,43 @@ ////////////////////////// const initNewChat = async () => { - if (sessionStorage.selectedModels) { - selectedModels = JSON.parse(sessionStorage.selectedModels); - sessionStorage.removeItem('selectedModels'); - } else { - if ($page.url.searchParams.get('models')) { - selectedModels = $page.url.searchParams.get('models')?.split(','); - } else if ($page.url.searchParams.get('model')) { - const urlModels = $page.url.searchParams.get('model')?.split(','); + if ($page.url.searchParams.get('models')) { + selectedModels = $page.url.searchParams.get('models')?.split(','); + } else if ($page.url.searchParams.get('model')) { + const urlModels = $page.url.searchParams.get('model')?.split(','); - if (urlModels.length === 1) { - const m = $models.find((m) => m.id === urlModels[0]); - if (!m) { - const modelSelectorButton = document.getElementById('model-selector-0-button'); - if (modelSelectorButton) { - modelSelectorButton.click(); - await tick(); + if (urlModels.length === 1) { + const m = $models.find((m) => m.id === urlModels[0]); + if (!m) { + const modelSelectorButton = document.getElementById('model-selector-0-button'); + if (modelSelectorButton) { + modelSelectorButton.click(); + await tick(); - const modelSelectorInput = document.getElementById('model-search-input'); - if (modelSelectorInput) { - modelSelectorInput.focus(); - modelSelectorInput.value = urlModels[0]; - modelSelectorInput.dispatchEvent(new Event('input')); - } + const modelSelectorInput = document.getElementById('model-search-input'); + if (modelSelectorInput) { + modelSelectorInput.focus(); + modelSelectorInput.value = urlModels[0]; + modelSelectorInput.dispatchEvent(new Event('input')); } - } else { - selectedModels = urlModels; } } else { selectedModels = urlModels; } - } else if ($settings?.models) { - selectedModels = $settings?.models; - } else if ($config?.default_models) { - console.log($config?.default_models.split(',') ?? ''); - selectedModels = $config?.default_models.split(','); + } else { + selectedModels = urlModels; + } + } else { + if (sessionStorage.selectedModels) { + selectedModels = JSON.parse(sessionStorage.selectedModels); + sessionStorage.removeItem('selectedModels'); + } else { + if ($settings?.models) { + selectedModels = $settings?.models; + } else if ($config?.default_models) { + console.log($config?.default_models.split(',') ?? ''); + selectedModels = $config?.default_models.split(','); + } } } @@ -1056,11 +1058,14 @@ } let _response = null; - if (model?.owned_by === 'ollama') { - _response = await sendPromptOllama(model, prompt, responseMessageId, _chatId); - } else if (model) { - _response = await sendPromptOpenAI(model, prompt, responseMessageId, _chatId); - } + + // if (model?.owned_by === 'ollama') { + // _response = await sendPromptOllama(model, prompt, responseMessageId, _chatId); + // } else if (model) { + // } + + _response = await sendPromptOpenAI(model, prompt, responseMessageId, _chatId); + _responses.push(_response); if (chatEventEmitter) clearInterval(chatEventEmitter); @@ -1207,24 +1212,14 @@ $settings?.params?.stream_response ?? params?.stream_response ?? true; + const [res, controller] = await generateChatCompletion(localStorage.token, { stream: stream, model: model.id, messages: messagesBody, - options: { - ...{ ...($settings?.params ?? {}), ...params }, - stop: - (params?.stop ?? $settings?.params?.stop ?? undefined) - ? (params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop).map( - (str) => decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"')) - ) - : undefined, - num_predict: params?.max_tokens ?? $settings?.params?.max_tokens ?? undefined, - repeat_penalty: - params?.frequency_penalty ?? $settings?.params?.frequency_penalty ?? undefined - }, format: $settings.requestFormat ?? undefined, keep_alive: $settings.keepAlive ?? undefined, + tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined, files: files.length > 0 ? files : undefined, session_id: $socket?.id, @@ -1542,13 +1537,6 @@ { stream: stream, model: model.id, - ...(stream && (model.info?.meta?.capabilities?.usage ?? false) - ? { - stream_options: { - include_usage: true - } - } - : {}), messages: [ params?.system || $settings.system || (responseMessage?.userContext ?? null) ? { @@ -1593,23 +1581,36 @@ content: message?.merged?.content ?? message.content }) })), - seed: params?.seed ?? $settings?.params?.seed ?? undefined, - stop: - (params?.stop ?? $settings?.params?.stop ?? undefined) - ? (params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop).map( - (str) => decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"')) - ) - : undefined, - temperature: params?.temperature ?? $settings?.params?.temperature ?? undefined, - top_p: params?.top_p ?? $settings?.params?.top_p ?? undefined, - frequency_penalty: - params?.frequency_penalty ?? $settings?.params?.frequency_penalty ?? undefined, - max_tokens: params?.max_tokens ?? $settings?.params?.max_tokens ?? undefined, + + // params: { + // ...$settings?.params, + // ...params, + + // format: $settings.requestFormat ?? undefined, + // keep_alive: $settings.keepAlive ?? undefined, + // stop: + // (params?.stop ?? $settings?.params?.stop ?? undefined) + // ? ( + // params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop + // ).map((str) => + // decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"')) + // ) + // : undefined + // }, + tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined, files: files.length > 0 ? files : undefined, session_id: $socket?.id, chat_id: $chatId, - id: responseMessageId + id: responseMessageId, + + ...(stream && (model.info?.meta?.capabilities?.usage ?? false) + ? { + stream_options: { + include_usage: true + } + } + : {}) }, `${WEBUI_BASE_URL}/api` ); @@ -1636,6 +1637,7 @@ await handleOpenAIError(error, null, model, responseMessage); break; } + if (done || stopResponseFlag || _chatId !== $chatId) { responseMessage.done = true; history.messages[responseMessageId] = responseMessage; @@ -1648,7 +1650,7 @@ } if (usage) { - responseMessage.info = { ...usage, openai: true, usage }; + responseMessage.usage = usage; } if (selectedModelId) { diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index 2e883df93..76210f68c 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -932,82 +932,45 @@ {/if} - {#if message.info} + {#if message.usage} ${sanitizeResponseContent( - JSON.stringify(message.info.usage, null, 2) - .replace(/"([^(")"]+)":/g, '$1:') - .slice(1, -1) - .split('\n') - .map((line) => line.slice(2)) - .map((line) => (line.endsWith(',') ? line.slice(0, -1) : line)) - .join('\n') - )}` - : `prompt_tokens: ${message.info.prompt_tokens ?? 'N/A'}
- completion_tokens: ${message.info.completion_tokens ?? 'N/A'}
- total_tokens: ${message.info.total_tokens ?? 'N/A'}` - : `response_token/s: ${ - `${ - Math.round( - ((message.info.eval_count ?? 0) / - ((message.info.eval_duration ?? 0) / 1000000000)) * - 100 - ) / 100 - } tokens` ?? 'N/A' - }
- prompt_token/s: ${ - Math.round( - ((message.info.prompt_eval_count ?? 0) / - ((message.info.prompt_eval_duration ?? 0) / 1000000000)) * - 100 - ) / 100 ?? 'N/A' - } tokens
- total_duration: ${ - Math.round(((message.info.total_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' - }ms
- load_duration: ${ - Math.round(((message.info.load_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' - }ms
- prompt_eval_count: ${message.info.prompt_eval_count ?? 'N/A'}
- prompt_eval_duration: ${ - Math.round(((message.info.prompt_eval_duration ?? 0) / 1000000) * 100) / 100 ?? - 'N/A' - }ms
- eval_count: ${message.info.eval_count ?? 'N/A'}
- eval_duration: ${ - Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' - }ms
- approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`} - placement="top" + content={message.usage + ? `
${sanitizeResponseContent(
+													JSON.stringify(message.usage, null, 2)
+														.replace(/"([^(")"]+)":/g, '$1:')
+														.slice(1, -1)
+														.split('\n')
+														.map((line) => line.slice(2))
+														.map((line) => (line.endsWith(',') ? line.slice(0, -1) : line))
+														.join('\n')
+												)}
` + : ''} + placement="bottom" > - - - + + +
{/if}