diff --git a/packages/bolt/app/lib/.server/llm/constants.ts b/packages/bolt/app/lib/.server/llm/constants.ts index bfe772e..b24acdf 100644 --- a/packages/bolt/app/lib/.server/llm/constants.ts +++ b/packages/bolt/app/lib/.server/llm/constants.ts @@ -1,2 +1,5 @@ // see https://docs.anthropic.com/en/docs/about-claude/models export const MAX_TOKENS = 8192; + +// limits the number of model responses that can be returned in a single request +export const MAX_RESPONSE_SEGMENTS = 2; diff --git a/packages/bolt/app/lib/.server/llm/prompts.ts b/packages/bolt/app/lib/.server/llm/prompts.ts index f811376..e691066 100644 --- a/packages/bolt/app/lib/.server/llm/prompts.ts +++ b/packages/bolt/app/lib/.server/llm/prompts.ts @@ -1,4 +1,5 @@ import { WORK_DIR } from '../../../utils/constants'; +import { stripIndents } from '../../../utils/stripIndent'; export const getSystemPrompt = (cwd: string = WORK_DIR) => ` You are Bolt, an expert AI assistant and exceptional senior software developer with vast knowledge across multiple programming languages, frameworks, and best practices. @@ -198,3 +199,8 @@ Here are some examples of correct usage of artifacts: `; + +export const CONTINUE_PROMPT = stripIndents` + Continue your prior response. IMPORTANT: Immediately begin from where you left off without any interruptions. + Do not repeat any content, including artifact and action tags. +`; diff --git a/packages/bolt/app/lib/.server/llm/switchable-stream.ts b/packages/bolt/app/lib/.server/llm/switchable-stream.ts new file mode 100644 index 0000000..bedc883 --- /dev/null +++ b/packages/bolt/app/lib/.server/llm/switchable-stream.ts @@ -0,0 +1,65 @@ +export default class SwitchableStream extends TransformStream { + private _controller: TransformStreamDefaultController | null = null; + private _currentReader: ReadableStreamDefaultReader | null = null; + private _switches = 0; + + constructor() { + let controllerRef: TransformStreamDefaultController | undefined; + + super({ + start(controller) { + controllerRef = controller; + }, + }); + + if (controllerRef === undefined) { + throw new Error('Controller not properly initialized'); + } + + this._controller = controllerRef; + } + + async switchSource(newStream: ReadableStream) { + if (this._currentReader) { + await this._currentReader.cancel(); + } + + this._currentReader = newStream.getReader(); + + this._pumpStream(); + + this._switches++; + } + + private async _pumpStream() { + if (!this._currentReader || !this._controller) { + throw new Error('Stream is not properly initialized'); + } + + try { + while (true) { + const { done, value } = await this._currentReader.read(); + + if (done) { + break; + } + + this._controller.enqueue(value); + } + } catch (error) { + this._controller.error(error); + } + } + + close() { + if (this._currentReader) { + this._currentReader.cancel(); + } + + this._controller?.terminate(); + } + + get switches() { + return this._switches; + } +} diff --git a/packages/bolt/app/routes/api.chat.ts b/packages/bolt/app/routes/api.chat.ts index 3974b6e..b75d4e5 100644 --- a/packages/bolt/app/routes/api.chat.ts +++ b/packages/bolt/app/routes/api.chat.ts @@ -1,12 +1,40 @@ import { type ActionFunctionArgs } from '@remix-run/cloudflare'; -import { streamText, type Messages } from '../lib/.server/llm/stream-text'; +import { MAX_RESPONSE_SEGMENTS } from '../lib/.server/llm/constants'; +import { CONTINUE_PROMPT } from '../lib/.server/llm/prompts'; +import { streamText, type Messages, type StreamingOptions } from '../lib/.server/llm/stream-text'; +import SwitchableStream from '../lib/.server/llm/switchable-stream'; +import { StreamingTextResponse } from 'ai'; export async function action({ context, request }: ActionFunctionArgs) { const { messages } = await request.json<{ messages: Messages }>(); + const stream = new SwitchableStream(); try { - const result = await streamText(messages, context.cloudflare.env, { toolChoice: 'none' }); - return result.toAIStreamResponse(); + const options: StreamingOptions = { + toolChoice: 'none', + onFinish: async ({ text: content, finishReason }) => { + if (finishReason !== 'length') { + return stream.close(); + } + + if (stream.switches >= MAX_RESPONSE_SEGMENTS) { + throw Error('Cannot continue message: maximum segments reached'); + } + + messages.push({ role: 'assistant', content }); + messages.push({ role: 'user', content: CONTINUE_PROMPT }); + + const result = await streamText(messages, context.cloudflare.env, options); + + return stream.switchSource(result.toAIStream()); + }, + }; + + const result = await streamText(messages, context.cloudflare.env, options); + + stream.switchSource(result.toAIStream()); + + return new StreamingTextResponse(stream.readable); } catch (error) { console.log(error);