diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts index bb1b55b1d..f91edad83 100644 --- a/src/lib/apis/streaming/index.ts +++ b/src/lib/apis/streaming/index.ts @@ -8,6 +8,16 @@ type TextStreamUpdate = { citations?: any; // eslint-disable-next-line @typescript-eslint/no-explicit-any error?: any; + usage?: ResponseUsage; +}; + +type ResponseUsage = { + /** Including images and tools if any */ + prompt_tokens: number; + /** The tokens generated */ + completion_tokens: number; + /** Sum of the above two fields */ + total_tokens: number; }; // createOpenAITextStream takes a responseBody with a SSE response, @@ -59,7 +69,11 @@ async function* openAIStreamToIterator( continue; } - yield { done: false, value: parsedData.choices?.[0]?.delta?.content ?? '' }; + yield { + done: false, + value: parsedData.choices?.[0]?.delta?.content ?? '', + usage: parsedData.usage + }; } catch (e) { console.error('Error extracting delta from SSE event:', e); } diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index c3ff70903..ffbed16a7 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -767,6 +767,12 @@ { model: model.id, stream: true, + stream_options: + model.info?.meta?.capabilities?.usage ?? false + ? { + include_usage: true + } + : undefined, messages: [ $settings.system || (responseMessage?.userContext ?? null) ? { @@ -835,9 +841,10 @@ if (res && res.ok && res.body) { const textStream = await createOpenAITextStream(res.body, $settings.splitLargeChunks); + let lastUsage = null; for await (const update of textStream) { - const { value, done, citations, error } = update; + const { value, done, citations, error, usage } = update; if (error) { await handleOpenAIError(error, null, model, responseMessage); break; @@ -853,6 +860,10 @@ break; } + if (usage) { + lastUsage = usage; + } + if (citations) { responseMessage.citations = citations; continue; @@ -886,6 +897,10 @@ } } + if (lastUsage) { + responseMessage.info = { ...lastUsage, openai: true }; + } + if ($chatId == _chatId) { if ($settings.saveChatHistory ?? true) { chat = await updateChatById(localStorage.token, _chatId, { diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index 5db52b49f..a8e03bcf1 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -108,8 +108,13 @@ renderLatex(); if (message.info) { - tooltipInstance = tippy(`#info-${message.id}`, { - content: `response_token/s: ${ + let tooltipContent = ''; + if (message.info.openai) { + tooltipContent = `prompt_tokens: ${message.info.prompt_tokens ?? 'N/A'}
+ completion_tokens: ${message.info.completion_tokens ?? 'N/A'}
+ total_tokens: ${message.info.total_tokens ?? 'N/A'}`; + } else { + tooltipContent = `response_token/s: ${ `${ Math.round( ((message.info.eval_count ?? 0) / (message.info.eval_duration / 1000000000)) * 100 @@ -139,9 +144,10 @@ eval_duration: ${ Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' }ms
- approximate_total: ${approximateToHumanReadable( - message.info.total_duration - )}
`, + approximate_total: ${approximateToHumanReadable(message.info.total_duration)}`; + } + tooltipInstance = tippy(`#info-${message.id}`, { + content: `${tooltipContent}`, allowHTML: true }); } diff --git a/src/routes/(app)/workspace/models/create/+page.svelte b/src/routes/(app)/workspace/models/create/+page.svelte index 51d7e2f6a..541bd11fc 100644 --- a/src/routes/(app)/workspace/models/create/+page.svelte +++ b/src/routes/(app)/workspace/models/create/+page.svelte @@ -56,6 +56,20 @@ id = name.replace(/\s+/g, '-').toLowerCase(); } + let baseModel = null; + $: { + baseModel = $models.find((m) => m.id === info.base_model_id); + console.log(baseModel); + if (baseModel) { + if (baseModel.owned_by === 'openai') { + capabilities.usage = baseModel.info?.meta?.capabilities?.usage ?? false; + } else { + delete capabilities.usage; + } + capabilities = capabilities; + } + } + const submitHandler = async () => { loading = true; diff --git a/src/routes/(app)/workspace/models/edit/+page.svelte b/src/routes/(app)/workspace/models/edit/+page.svelte index e21190d6a..a2d5a7d76 100644 --- a/src/routes/(app)/workspace/models/edit/+page.svelte +++ b/src/routes/(app)/workspace/models/edit/+page.svelte @@ -107,6 +107,10 @@ params = { ...params, ...model?.info?.params }; params.stop = params?.stop ? (params?.stop ?? []).join(',') : null; + if (model?.owned_by === 'openai') { + capabilities.usage = false; + } + if (model?.info?.meta?.capabilities) { capabilities = { ...capabilities, ...model?.info?.meta?.capabilities }; }