Merge pull request #1630 from cheahjs/feat/split-large-chunks

feat: split large openai responses into smaller chunks
This commit is contained in:
Timothy Jaeryang Baek 2024-04-22 13:56:26 -07:00 committed by GitHub
commit 2d7d6cfffc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 121 additions and 52 deletions

View File

@ -0,0 +1,70 @@
type TextStreamUpdate = {
done: boolean;
value: string;
};
// createOpenAITextStream takes a ReadableStreamDefaultReader from an SSE response,
// and returns an async generator that emits delta updates with large deltas chunked into random sized chunks
export async function createOpenAITextStream(
messageStream: ReadableStreamDefaultReader,
splitLargeDeltas: boolean
): Promise<AsyncGenerator<TextStreamUpdate>> {
let iterator = openAIStreamToIterator(messageStream);
if (splitLargeDeltas) {
iterator = streamLargeDeltasAsRandomChunks(iterator);
}
return iterator;
}
async function* openAIStreamToIterator(
reader: ReadableStreamDefaultReader
): AsyncGenerator<TextStreamUpdate> {
while (true) {
const { value, done } = await reader.read();
if (done) {
yield { done: true, value: '' };
break;
}
const lines = value.split('\n');
for (const line of lines) {
if (line !== '') {
console.log(line);
if (line === 'data: [DONE]') {
yield { done: true, value: '' };
} else {
const data = JSON.parse(line.replace(/^data: /, ''));
console.log(data);
yield { done: false, value: data.choices[0].delta.content ?? '' };
}
}
}
}
}
// streamLargeDeltasAsRandomChunks will chunk large deltas (length > 5) into random sized chunks between 1-3 characters
// This is to simulate a more fluid streaming, even though some providers may send large chunks of text at once
async function* streamLargeDeltasAsRandomChunks(
iterator: AsyncGenerator<TextStreamUpdate>
): AsyncGenerator<TextStreamUpdate> {
for await (const textStreamUpdate of iterator) {
if (textStreamUpdate.done) {
yield textStreamUpdate;
return;
}
let content = textStreamUpdate.value;
if (content.length < 5) {
yield { done: false, value: content };
continue;
}
while (content != '') {
const chunkSize = Math.min(Math.floor(Math.random() * 3) + 1, content.length);
const chunk = content.slice(0, chunkSize);
yield { done: false, value: chunk };
await sleep(5);
content = content.slice(chunkSize);
}
}
}
const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));

View File

@ -17,11 +17,17 @@
let titleAutoGenerateModelExternal = '';
let fullScreenMode = false;
let titleGenerationPrompt = '';
let splitLargeChunks = false;
// Interface
let promptSuggestions = [];
let showUsername = false;
const toggleSplitLargeChunks = async () => {
splitLargeChunks = !splitLargeChunks;
saveSettings({ splitLargeChunks: splitLargeChunks });
};
const toggleFullScreenMode = async () => {
fullScreenMode = !fullScreenMode;
saveSettings({ fullScreenMode: fullScreenMode });
@ -197,6 +203,28 @@
</button>
</div>
</div>
<div>
<div class=" py-0.5 flex w-full justify-between">
<div class=" self-center text-xs font-medium">
{$i18n.t('Fluidly stream large external response chunks')}
</div>
<button
class="p-1 px-3 text-xs flex rounded transition"
on:click={() => {
toggleSplitLargeChunks();
}}
type="button"
>
{#if splitLargeChunks === true}
<span class="ml-2 self-center">{$i18n.t('On')}</span>
{:else}
<span class="ml-2 self-center">{$i18n.t('Off')}</span>
{/if}
</button>
</div>
</div>
</div>
<hr class=" dark:border-gray-700" />

View File

@ -152,6 +152,7 @@
"File Mode": "",
"File not found.": "",
"Fingerprint spoofing detected: Unable to use initials as avatar. Defaulting to default profile image.": "",
"Fluidly stream large external response chunks": "",
"Focus chat input": "",
"Format your variables using square brackets like this:": "",
"From (Base Model)": "",

View File

@ -39,6 +39,7 @@
import { RAGTemplate } from '$lib/utils/rag';
import { LITELLM_API_BASE_URL, OLLAMA_API_BASE_URL, OPENAI_API_BASE_URL } from '$lib/constants';
import { WEBUI_BASE_URL } from '$lib/constants';
import { createOpenAITextStream } from '$lib/apis/streaming';
const i18n = getContext('i18n');
@ -599,38 +600,22 @@
.pipeThrough(splitStream('\n'))
.getReader();
while (true) {
const { value, done } = await reader.read();
const textStream = await createOpenAITextStream(reader, $settings.splitLargeChunks);
console.log(textStream);
for await (const update of textStream) {
const { value, done } = update;
if (done || stopResponseFlag || _chatId !== $chatId) {
responseMessage.done = true;
messages = messages;
break;
}
try {
let lines = value.split('\n');
for (const line of lines) {
if (line !== '') {
console.log(line);
if (line === 'data: [DONE]') {
responseMessage.done = true;
messages = messages;
} else {
let data = JSON.parse(line.replace(/^data: /, ''));
console.log(data);
if (responseMessage.content == '' && data.choices[0].delta.content == '\n') {
continue;
} else {
responseMessage.content += data.choices[0].delta.content ?? '';
messages = messages;
}
}
}
}
} catch (error) {
console.log(error);
if (responseMessage.content == '' && value == '\n') {
continue;
} else {
responseMessage.content += value;
messages = messages;
}
if ($settings.notificationEnabled && !document.hasFocus()) {

View File

@ -42,6 +42,7 @@
OLLAMA_API_BASE_URL,
WEBUI_BASE_URL
} from '$lib/constants';
import { createOpenAITextStream } from '$lib/apis/streaming';
const i18n = getContext('i18n');
@ -611,38 +612,22 @@
.pipeThrough(splitStream('\n'))
.getReader();
while (true) {
const { value, done } = await reader.read();
const textStream = await createOpenAITextStream(reader, $settings.splitLargeChunks);
console.log(textStream);
for await (const update of textStream) {
const { value, done } = update;
if (done || stopResponseFlag || _chatId !== $chatId) {
responseMessage.done = true;
messages = messages;
break;
}
try {
let lines = value.split('\n');
for (const line of lines) {
if (line !== '') {
console.log(line);
if (line === 'data: [DONE]') {
responseMessage.done = true;
messages = messages;
} else {
let data = JSON.parse(line.replace(/^data: /, ''));
console.log(data);
if (responseMessage.content == '' && data.choices[0].delta.content == '\n') {
continue;
} else {
responseMessage.content += data.choices[0].delta.content ?? '';
messages = messages;
}
}
}
}
} catch (error) {
console.log(error);
if (responseMessage.content == '' && value == '\n') {
continue;
} else {
responseMessage.content += value;
messages = messages;
}
if ($settings.notificationEnabled && !document.hasFocus()) {