diff --git a/backend/config.py b/backend/config.py index 28ace5d5d..761592f38 100644 --- a/backend/config.py +++ b/backend/config.py @@ -574,6 +574,7 @@ ENABLE_COMMUNITY_SHARING = PersistentConfig( os.environ.get("ENABLE_COMMUNITY_SHARING", "True").lower() == "true", ) + class BannerModel(BaseModel): id: str type: str diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts index bb1b55b1d..f91edad83 100644 --- a/src/lib/apis/streaming/index.ts +++ b/src/lib/apis/streaming/index.ts @@ -8,6 +8,16 @@ type TextStreamUpdate = { citations?: any; // eslint-disable-next-line @typescript-eslint/no-explicit-any error?: any; + usage?: ResponseUsage; +}; + +type ResponseUsage = { + /** Including images and tools if any */ + prompt_tokens: number; + /** The tokens generated */ + completion_tokens: number; + /** Sum of the above two fields */ + total_tokens: number; }; // createOpenAITextStream takes a responseBody with a SSE response, @@ -59,7 +69,11 @@ async function* openAIStreamToIterator( continue; } - yield { done: false, value: parsedData.choices?.[0]?.delta?.content ?? '' }; + yield { + done: false, + value: parsedData.choices?.[0]?.delta?.content ?? '', + usage: parsedData.usage + }; } catch (e) { console.error('Error extracting delta from SSE event:', e); } diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte index d2050dd05..17be99901 100644 --- a/src/lib/components/chat/Chat.svelte +++ b/src/lib/components/chat/Chat.svelte @@ -685,6 +685,12 @@ { model: model.id, stream: true, + stream_options: + model.info?.meta?.capabilities?.usage ?? false + ? { + include_usage: true + } + : undefined, messages: [ $settings.system || (responseMessage?.userContext ?? null) ? { @@ -753,9 +759,10 @@ if (res && res.ok && res.body) { const textStream = await createOpenAITextStream(res.body, $settings.splitLargeChunks); + let lastUsage = null; for await (const update of textStream) { - const { value, done, citations, error } = update; + const { value, done, citations, error, usage } = update; if (error) { await handleOpenAIError(error, null, model, responseMessage); break; @@ -771,6 +778,10 @@ break; } + if (usage) { + lastUsage = usage; + } + if (citations) { responseMessage.citations = citations; continue; @@ -804,6 +815,10 @@ } } + if (lastUsage) { + responseMessage.info = { ...lastUsage, openai: true }; + } + if ($chatId == _chatId) { if ($settings.saveChatHistory ?? true) { chat = await updateChatById(localStorage.token, _chatId, { diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte index c5d753b51..fb08afb5d 100644 --- a/src/lib/components/chat/Messages/ResponseMessage.svelte +++ b/src/lib/components/chat/Messages/ResponseMessage.svelte @@ -106,8 +106,13 @@ renderLatex(); if (message.info) { - tooltipInstance = tippy(`#info-${message.id}`, { - content: `response_token/s: ${ + let tooltipContent = ''; + if (message.info.openai) { + tooltipContent = `prompt_tokens: ${message.info.prompt_tokens ?? 'N/A'}
+ completion_tokens: ${message.info.completion_tokens ?? 'N/A'}
+ total_tokens: ${message.info.total_tokens ?? 'N/A'}`; + } else { + tooltipContent = `response_token/s: ${ `${ Math.round( ((message.info.eval_count ?? 0) / (message.info.eval_duration / 1000000000)) * 100 @@ -137,9 +142,10 @@ eval_duration: ${ Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A' }ms
- approximate_total: ${approximateToHumanReadable( - message.info.total_duration - )}
`, + approximate_total: ${approximateToHumanReadable(message.info.total_duration)}`; + } + tooltipInstance = tippy(`#info-${message.id}`, { + content: `${tooltipContent}`, allowHTML: true }); } diff --git a/src/routes/(app)/workspace/models/create/+page.svelte b/src/routes/(app)/workspace/models/create/+page.svelte index 51d7e2f6a..541bd11fc 100644 --- a/src/routes/(app)/workspace/models/create/+page.svelte +++ b/src/routes/(app)/workspace/models/create/+page.svelte @@ -56,6 +56,20 @@ id = name.replace(/\s+/g, '-').toLowerCase(); } + let baseModel = null; + $: { + baseModel = $models.find((m) => m.id === info.base_model_id); + console.log(baseModel); + if (baseModel) { + if (baseModel.owned_by === 'openai') { + capabilities.usage = baseModel.info?.meta?.capabilities?.usage ?? false; + } else { + delete capabilities.usage; + } + capabilities = capabilities; + } + } + const submitHandler = async () => { loading = true; diff --git a/src/routes/(app)/workspace/models/edit/+page.svelte b/src/routes/(app)/workspace/models/edit/+page.svelte index e21190d6a..a2d5a7d76 100644 --- a/src/routes/(app)/workspace/models/edit/+page.svelte +++ b/src/routes/(app)/workspace/models/edit/+page.svelte @@ -107,6 +107,10 @@ params = { ...params, ...model?.info?.params }; params.stop = params?.stop ? (params?.stop ?? []).join(',') : null; + if (model?.owned_by === 'openai') { + capabilities.usage = false; + } + if (model?.info?.meta?.capabilities) { capabilities = { ...capabilities, ...model?.info?.meta?.capabilities }; }