This commit is contained in:
Timothy Jaeryang Baek 2024-12-12 23:31:08 -08:00
parent 9a081c8593
commit f9a05dd1e1
6 changed files with 190 additions and 144 deletions

View File

@ -136,7 +136,7 @@ async def generate_chat_completion(
response = await generate_ollama_chat_completion(
request=request, form_data=form_data, user=user, bypass_filter=bypass_filter
)
if form_data.stream:
if form_data.get("stream"):
response.headers["content-type"] = "text/event-stream"
return StreamingResponse(
convert_streaming_response_ollama_to_openai(response),

View File

@ -106,7 +106,7 @@ def openai_chat_message_template(model: str):
def openai_chat_chunk_message_template(
model: str, message: Optional[str] = None
model: str, message: Optional[str] = None, usage: Optional[dict] = None
) -> dict:
template = openai_chat_message_template(model)
template["object"] = "chat.completion.chunk"
@ -114,17 +114,23 @@ def openai_chat_chunk_message_template(
template["choices"][0]["delta"] = {"content": message}
else:
template["choices"][0]["finish_reason"] = "stop"
if usage:
template["usage"] = usage
return template
def openai_chat_completion_message_template(
model: str, message: Optional[str] = None
model: str, message: Optional[str] = None, usage: Optional[dict] = None
) -> dict:
template = openai_chat_message_template(model)
template["object"] = "chat.completion"
if message is not None:
template["choices"][0]["message"] = {"content": message, "role": "assistant"}
template["choices"][0]["finish_reason"] = "stop"
if usage:
template["usage"] = usage
return template

View File

@ -21,8 +21,63 @@ async def convert_streaming_response_ollama_to_openai(ollama_streaming_response)
message_content = data.get("message", {}).get("content", "")
done = data.get("done", False)
usage = None
if done:
usage = {
"response_token/s": (
round(
(
(
data.get("eval_count", 0)
/ ((data.get("eval_duration", 0) / 1_000_000_000))
)
* 100
),
2,
)
if data.get("eval_duration", 0) > 0
else "N/A"
),
"prompt_token/s": (
round(
(
(
data.get("prompt_eval_count", 0)
/ (
(
data.get("prompt_eval_duration", 0)
/ 1_000_000_000
)
)
)
* 100
),
2,
)
if data.get("prompt_eval_duration", 0) > 0
else "N/A"
),
"total_duration": round(
((data.get("total_duration", 0) / 1_000_000) * 100), 2
),
"load_duration": round(
((data.get("load_duration", 0) / 1_000_000) * 100), 2
),
"prompt_eval_count": data.get("prompt_eval_count", 0),
"prompt_eval_duration": round(
((data.get("prompt_eval_duration", 0) / 1_000_000) * 100), 2
),
"eval_count": data.get("eval_count", 0),
"eval_duration": round(
((data.get("eval_duration", 0) / 1_000_000) * 100), 2
),
"approximate_total": (
lambda s: f"{s // 3600}h{(s % 3600) // 60}m{s % 60}s"
)((data.get("total_duration", 0) or 0) // 1_000_000_000),
}
data = openai_chat_chunk_message_template(
model, message_content if not done else None
model, message_content if not done else None, usage
)
line = f"data: {json.dumps(data)}\n\n"

View File

@ -77,10 +77,14 @@ async function* openAIStreamToIterator(
continue;
}
if (parsedData.usage) {
yield { done: false, value: '', usage: parsedData.usage };
continue;
}
yield {
done: false,
value: parsedData.choices?.[0]?.delta?.content ?? '',
usage: parsedData.usage
};
} catch (e) {
console.error('Error extracting delta from SSE event:', e);
@ -98,10 +102,26 @@ async function* streamLargeDeltasAsRandomChunks(
yield textStreamUpdate;
return;
}
if (textStreamUpdate.error) {
yield textStreamUpdate;
continue;
}
if (textStreamUpdate.sources) {
yield textStreamUpdate;
continue;
}
if (textStreamUpdate.selectedModelId) {
yield textStreamUpdate;
continue;
}
if (textStreamUpdate.usage) {
yield textStreamUpdate;
continue;
}
let content = textStreamUpdate.value;
if (content.length < 5) {
yield { done: false, value: content };

View File

@ -455,41 +455,43 @@
//////////////////////////
const initNewChat = async () => {
if (sessionStorage.selectedModels) {
selectedModels = JSON.parse(sessionStorage.selectedModels);
sessionStorage.removeItem('selectedModels');
} else {
if ($page.url.searchParams.get('models')) {
selectedModels = $page.url.searchParams.get('models')?.split(',');
} else if ($page.url.searchParams.get('model')) {
const urlModels = $page.url.searchParams.get('model')?.split(',');
if ($page.url.searchParams.get('models')) {
selectedModels = $page.url.searchParams.get('models')?.split(',');
} else if ($page.url.searchParams.get('model')) {
const urlModels = $page.url.searchParams.get('model')?.split(',');
if (urlModels.length === 1) {
const m = $models.find((m) => m.id === urlModels[0]);
if (!m) {
const modelSelectorButton = document.getElementById('model-selector-0-button');
if (modelSelectorButton) {
modelSelectorButton.click();
await tick();
if (urlModels.length === 1) {
const m = $models.find((m) => m.id === urlModels[0]);
if (!m) {
const modelSelectorButton = document.getElementById('model-selector-0-button');
if (modelSelectorButton) {
modelSelectorButton.click();
await tick();
const modelSelectorInput = document.getElementById('model-search-input');
if (modelSelectorInput) {
modelSelectorInput.focus();
modelSelectorInput.value = urlModels[0];
modelSelectorInput.dispatchEvent(new Event('input'));
}
const modelSelectorInput = document.getElementById('model-search-input');
if (modelSelectorInput) {
modelSelectorInput.focus();
modelSelectorInput.value = urlModels[0];
modelSelectorInput.dispatchEvent(new Event('input'));
}
} else {
selectedModels = urlModels;
}
} else {
selectedModels = urlModels;
}
} else if ($settings?.models) {
selectedModels = $settings?.models;
} else if ($config?.default_models) {
console.log($config?.default_models.split(',') ?? '');
selectedModels = $config?.default_models.split(',');
} else {
selectedModels = urlModels;
}
} else {
if (sessionStorage.selectedModels) {
selectedModels = JSON.parse(sessionStorage.selectedModels);
sessionStorage.removeItem('selectedModels');
} else {
if ($settings?.models) {
selectedModels = $settings?.models;
} else if ($config?.default_models) {
console.log($config?.default_models.split(',') ?? '');
selectedModels = $config?.default_models.split(',');
}
}
}
@ -1056,11 +1058,14 @@
}
let _response = null;
if (model?.owned_by === 'ollama') {
_response = await sendPromptOllama(model, prompt, responseMessageId, _chatId);
} else if (model) {
_response = await sendPromptOpenAI(model, prompt, responseMessageId, _chatId);
}
// if (model?.owned_by === 'ollama') {
// _response = await sendPromptOllama(model, prompt, responseMessageId, _chatId);
// } else if (model) {
// }
_response = await sendPromptOpenAI(model, prompt, responseMessageId, _chatId);
_responses.push(_response);
if (chatEventEmitter) clearInterval(chatEventEmitter);
@ -1207,24 +1212,14 @@
$settings?.params?.stream_response ??
params?.stream_response ??
true;
const [res, controller] = await generateChatCompletion(localStorage.token, {
stream: stream,
model: model.id,
messages: messagesBody,
options: {
...{ ...($settings?.params ?? {}), ...params },
stop:
(params?.stop ?? $settings?.params?.stop ?? undefined)
? (params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop).map(
(str) => decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"'))
)
: undefined,
num_predict: params?.max_tokens ?? $settings?.params?.max_tokens ?? undefined,
repeat_penalty:
params?.frequency_penalty ?? $settings?.params?.frequency_penalty ?? undefined
},
format: $settings.requestFormat ?? undefined,
keep_alive: $settings.keepAlive ?? undefined,
tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined,
files: files.length > 0 ? files : undefined,
session_id: $socket?.id,
@ -1542,13 +1537,6 @@
{
stream: stream,
model: model.id,
...(stream && (model.info?.meta?.capabilities?.usage ?? false)
? {
stream_options: {
include_usage: true
}
}
: {}),
messages: [
params?.system || $settings.system || (responseMessage?.userContext ?? null)
? {
@ -1593,23 +1581,36 @@
content: message?.merged?.content ?? message.content
})
})),
seed: params?.seed ?? $settings?.params?.seed ?? undefined,
stop:
(params?.stop ?? $settings?.params?.stop ?? undefined)
? (params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop).map(
(str) => decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"'))
)
: undefined,
temperature: params?.temperature ?? $settings?.params?.temperature ?? undefined,
top_p: params?.top_p ?? $settings?.params?.top_p ?? undefined,
frequency_penalty:
params?.frequency_penalty ?? $settings?.params?.frequency_penalty ?? undefined,
max_tokens: params?.max_tokens ?? $settings?.params?.max_tokens ?? undefined,
// params: {
// ...$settings?.params,
// ...params,
// format: $settings.requestFormat ?? undefined,
// keep_alive: $settings.keepAlive ?? undefined,
// stop:
// (params?.stop ?? $settings?.params?.stop ?? undefined)
// ? (
// params?.stop.split(',').map((token) => token.trim()) ?? $settings.params.stop
// ).map((str) =>
// decodeURIComponent(JSON.parse('"' + str.replace(/\"/g, '\\"') + '"'))
// )
// : undefined
// },
tool_ids: selectedToolIds.length > 0 ? selectedToolIds : undefined,
files: files.length > 0 ? files : undefined,
session_id: $socket?.id,
chat_id: $chatId,
id: responseMessageId
id: responseMessageId,
...(stream && (model.info?.meta?.capabilities?.usage ?? false)
? {
stream_options: {
include_usage: true
}
}
: {})
},
`${WEBUI_BASE_URL}/api`
);
@ -1636,6 +1637,7 @@
await handleOpenAIError(error, null, model, responseMessage);
break;
}
if (done || stopResponseFlag || _chatId !== $chatId) {
responseMessage.done = true;
history.messages[responseMessageId] = responseMessage;
@ -1648,7 +1650,7 @@
}
if (usage) {
responseMessage.info = { ...usage, openai: true, usage };
responseMessage.usage = usage;
}
if (selectedModelId) {

View File

@ -932,82 +932,45 @@
</Tooltip>
{/if}
{#if message.info}
{#if message.usage}
<Tooltip
content={message.info.openai
? message.info.usage
? `<pre>${sanitizeResponseContent(
JSON.stringify(message.info.usage, null, 2)
.replace(/"([^(")"]+)":/g, '$1:')
.slice(1, -1)
.split('\n')
.map((line) => line.slice(2))
.map((line) => (line.endsWith(',') ? line.slice(0, -1) : line))
.join('\n')
)}</pre>`
: `prompt_tokens: ${message.info.prompt_tokens ?? 'N/A'}<br/>
completion_tokens: ${message.info.completion_tokens ?? 'N/A'}<br/>
total_tokens: ${message.info.total_tokens ?? 'N/A'}`
: `response_token/s: ${
`${
Math.round(
((message.info.eval_count ?? 0) /
((message.info.eval_duration ?? 0) / 1000000000)) *
100
) / 100
} tokens` ?? 'N/A'
}<br/>
prompt_token/s: ${
Math.round(
((message.info.prompt_eval_count ?? 0) /
((message.info.prompt_eval_duration ?? 0) / 1000000000)) *
100
) / 100 ?? 'N/A'
} tokens<br/>
total_duration: ${
Math.round(((message.info.total_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms<br/>
load_duration: ${
Math.round(((message.info.load_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms<br/>
prompt_eval_count: ${message.info.prompt_eval_count ?? 'N/A'}<br/>
prompt_eval_duration: ${
Math.round(((message.info.prompt_eval_duration ?? 0) / 1000000) * 100) / 100 ??
'N/A'
}ms<br/>
eval_count: ${message.info.eval_count ?? 'N/A'}<br/>
eval_duration: ${
Math.round(((message.info.eval_duration ?? 0) / 1000000) * 100) / 100 ?? 'N/A'
}ms<br/>
approximate_total: ${approximateToHumanReadable(message.info.total_duration ?? 0)}`}
placement="top"
content={message.usage
? `<pre>${sanitizeResponseContent(
JSON.stringify(message.usage, null, 2)
.replace(/"([^(")"]+)":/g, '$1:')
.slice(1, -1)
.split('\n')
.map((line) => line.slice(2))
.map((line) => (line.endsWith(',') ? line.slice(0, -1) : line))
.join('\n')
)}</pre>`
: ''}
placement="bottom"
>
<Tooltip content={$i18n.t('Generation Info')} placement="bottom">
<button
class=" {isLastMessage
? 'visible'
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition whitespace-pre-wrap"
on:click={() => {
console.log(message);
}}
id="info-{message.id}"
<button
class=" {isLastMessage
? 'visible'
: 'invisible group-hover:visible'} p-1.5 hover:bg-black/5 dark:hover:bg-white/5 rounded-lg dark:hover:text-white hover:text-black transition whitespace-pre-wrap"
on:click={() => {
console.log(message);
}}
id="info-{message.id}"
>
<svg
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 24 24"
stroke-width="2.3"
stroke="currentColor"
class="w-4 h-4"
>
<svg
xmlns="http://www.w3.org/2000/svg"
fill="none"
viewBox="0 0 24 24"
stroke-width="2.3"
stroke="currentColor"
class="w-4 h-4"
>
<path
stroke-linecap="round"
stroke-linejoin="round"
d="M11.25 11.25l.041-.02a.75.75 0 011.063.852l-.708 2.836a.75.75 0 001.063.853l.041-.021M21 12a9 9 0 11-18 0 9 9 0 0118 0zm-9-3.75h.008v.008H12V8.25z"
/>
</svg>
</button>
</Tooltip>
<path
stroke-linecap="round"
stroke-linejoin="round"
d="M11.25 11.25l.041-.02a.75.75 0 011.063.852l-.708 2.836a.75.75 0 001.063.853l.041-.021M21 12a9 9 0 11-18 0 9 9 0 0118 0zm-9-3.75h.008v.008H12V8.25z"
/>
</svg>
</button>
</Tooltip>
{/if}