diff --git a/backend/config.py b/backend/config.py
index 28ace5d5d..761592f38 100644
--- a/backend/config.py
+++ b/backend/config.py
@@ -574,6 +574,7 @@ ENABLE_COMMUNITY_SHARING = PersistentConfig(
os.environ.get("ENABLE_COMMUNITY_SHARING", "True").lower() == "true",
)
+
class BannerModel(BaseModel):
id: str
type: str
diff --git a/src/lib/apis/streaming/index.ts b/src/lib/apis/streaming/index.ts
index bb1b55b1d..f91edad83 100644
--- a/src/lib/apis/streaming/index.ts
+++ b/src/lib/apis/streaming/index.ts
@@ -8,6 +8,16 @@ type TextStreamUpdate = {
citations?: any;
// eslint-disable-next-line @typescript-eslint/no-explicit-any
error?: any;
+ usage?: ResponseUsage;
+};
+
+type ResponseUsage = {
+ /** Including images and tools if any */
+ prompt_tokens: number;
+ /** The tokens generated */
+ completion_tokens: number;
+ /** Sum of the above two fields */
+ total_tokens: number;
};
// createOpenAITextStream takes a responseBody with a SSE response,
@@ -59,7 +69,11 @@ async function* openAIStreamToIterator(
continue;
}
- yield { done: false, value: parsedData.choices?.[0]?.delta?.content ?? '' };
+ yield {
+ done: false,
+ value: parsedData.choices?.[0]?.delta?.content ?? '',
+ usage: parsedData.usage
+ };
} catch (e) {
console.error('Error extracting delta from SSE event:', e);
}
diff --git a/src/lib/components/chat/Chat.svelte b/src/lib/components/chat/Chat.svelte
index d2050dd05..17be99901 100644
--- a/src/lib/components/chat/Chat.svelte
+++ b/src/lib/components/chat/Chat.svelte
@@ -685,6 +685,12 @@
{
model: model.id,
stream: true,
+ stream_options:
+ model.info?.meta?.capabilities?.usage ?? false
+ ? {
+ include_usage: true
+ }
+ : undefined,
messages: [
$settings.system || (responseMessage?.userContext ?? null)
? {
@@ -753,9 +759,10 @@
if (res && res.ok && res.body) {
const textStream = await createOpenAITextStream(res.body, $settings.splitLargeChunks);
+ let lastUsage = null;
for await (const update of textStream) {
- const { value, done, citations, error } = update;
+ const { value, done, citations, error, usage } = update;
if (error) {
await handleOpenAIError(error, null, model, responseMessage);
break;
@@ -771,6 +778,10 @@
break;
}
+ if (usage) {
+ lastUsage = usage;
+ }
+
if (citations) {
responseMessage.citations = citations;
continue;
@@ -804,6 +815,10 @@
}
}
+ if (lastUsage) {
+ responseMessage.info = { ...lastUsage, openai: true };
+ }
+
if ($chatId == _chatId) {
if ($settings.saveChatHistory ?? true) {
chat = await updateChatById(localStorage.token, _chatId, {
diff --git a/src/lib/components/chat/Messages/ResponseMessage.svelte b/src/lib/components/chat/Messages/ResponseMessage.svelte
index c5d753b51..fb08afb5d 100644
--- a/src/lib/components/chat/Messages/ResponseMessage.svelte
+++ b/src/lib/components/chat/Messages/ResponseMessage.svelte
@@ -106,8 +106,13 @@
renderLatex();
if (message.info) {
- tooltipInstance = tippy(`#info-${message.id}`, {
- content: `response_token/s: ${
+ let tooltipContent = '';
+ if (message.info.openai) {
+ tooltipContent = `prompt_tokens: ${message.info.prompt_tokens ?? 'N/A'}
+ completion_tokens: ${message.info.completion_tokens ?? 'N/A'}
+ total_tokens: ${message.info.total_tokens ?? 'N/A'}`;
+ } else {
+ tooltipContent = `response_token/s: ${
`${
Math.round(
((message.info.eval_count ?? 0) / (message.info.eval_duration / 1000000000)) * 100
@@ -137,9 +142,10 @@
eval_duration: ${
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms
- approximate_total: ${approximateToHumanReadable(
- message.info.total_duration
- )}`,
+ approximate_total: ${approximateToHumanReadable(message.info.total_duration)}`;
+ }
+ tooltipInstance = tippy(`#info-${message.id}`, {
+ content: `${tooltipContent}`,
allowHTML: true
});
}
diff --git a/src/routes/(app)/workspace/models/create/+page.svelte b/src/routes/(app)/workspace/models/create/+page.svelte
index 51d7e2f6a..541bd11fc 100644
--- a/src/routes/(app)/workspace/models/create/+page.svelte
+++ b/src/routes/(app)/workspace/models/create/+page.svelte
@@ -56,6 +56,20 @@
id = name.replace(/\s+/g, '-').toLowerCase();
}
+ let baseModel = null;
+ $: {
+ baseModel = $models.find((m) => m.id === info.base_model_id);
+ console.log(baseModel);
+ if (baseModel) {
+ if (baseModel.owned_by === 'openai') {
+ capabilities.usage = baseModel.info?.meta?.capabilities?.usage ?? false;
+ } else {
+ delete capabilities.usage;
+ }
+ capabilities = capabilities;
+ }
+ }
+
const submitHandler = async () => {
loading = true;
diff --git a/src/routes/(app)/workspace/models/edit/+page.svelte b/src/routes/(app)/workspace/models/edit/+page.svelte
index e21190d6a..a2d5a7d76 100644
--- a/src/routes/(app)/workspace/models/edit/+page.svelte
+++ b/src/routes/(app)/workspace/models/edit/+page.svelte
@@ -107,6 +107,10 @@
params = { ...params, ...model?.info?.params };
params.stop = params?.stop ? (params?.stop ?? []).join(',') : null;
+ if (model?.owned_by === 'openai') {
+ capabilities.usage = false;
+ }
+
if (model?.info?.meta?.capabilities) {
capabilities = { ...capabilities, ...model?.info?.meta?.capabilities };
}