mirror of
https://github.com/stackblitz/bolt.new
synced 2025-06-26 18:17:50 +00:00
285 lines
12 KiB
TypeScript
285 lines
12 KiB
TypeScript
import { type ActionFunctionArgs } from '@remix-run/cloudflare';
|
|
import { MAX_RESPONSE_SEGMENTS, MAX_TOKENS } from '~/lib/.server/llm/constants';
|
|
import { CONTINUE_PROMPT, API_CHATBOT_PROMPT, INJECTED_PROMPT_1 , INJECTED_PROMPT_2 } from '~/lib/.server/llm/prompts';
|
|
import { streamText, type Messages, type StreamingOptions } from '~/lib/.server/llm/stream-text';
|
|
import SwitchableStream from '~/lib/.server/llm/switchable-stream';
|
|
import { streamText as _streamText, convertToCoreMessages } from 'ai';
|
|
import { getAPIKey } from '~/lib/.server/llm/api-key';
|
|
import { getAnthropicModel } from '~/lib/.server/llm/model';
|
|
|
|
const estimateTokens = (text: string): number => {
|
|
// Rough estimation: ~4 characters per token for English text
|
|
return Math.ceil((text || '').length / 4);
|
|
};
|
|
|
|
const manageContextWindow = (messages: Messages, maxTokens: number = 150000): Messages => {
|
|
// Calculate total tokens in current conversation
|
|
let totalTokens = messages.reduce((sum, msg) => {
|
|
return sum + estimateTokens(msg.content || '');
|
|
}, 0);
|
|
|
|
console.log(`Total tokens before management: ${totalTokens}`);
|
|
|
|
// If we're under the limit, return messages as-is
|
|
if (totalTokens <= maxTokens) {
|
|
return messages;
|
|
}
|
|
|
|
// Create a copy to avoid mutating the original
|
|
const managedMessages = [...messages];
|
|
|
|
// Always keep the first message (system context) and last few messages
|
|
const keepRecentCount = 6; // Keep last 6 messages for context
|
|
|
|
// Remove messages from the middle until we're under the token limit
|
|
while (totalTokens > maxTokens && managedMessages.length > keepRecentCount + 1) {
|
|
// Find the oldest non-system message to remove
|
|
let removeIndex = 1;
|
|
|
|
// Skip any critical messages at the beginning
|
|
while (removeIndex < managedMessages.length - keepRecentCount) {
|
|
const msg = managedMessages[removeIndex];
|
|
|
|
// Don't remove injected prompts or transition markers
|
|
if (msg.role === 'user' && (
|
|
msg.content.includes('[INJECTED_PROMPT_1]') ||
|
|
msg.content.includes('[INJECTED_PROMPT_2]')
|
|
)) {
|
|
removeIndex++;
|
|
continue;
|
|
}
|
|
|
|
if (msg.role === 'assistant' && msg.content.includes('[final]')) {
|
|
removeIndex++;
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if (removeIndex < managedMessages.length - keepRecentCount) {
|
|
const removedMessage = managedMessages.splice(removeIndex, 1)[0];
|
|
totalTokens -= estimateTokens(removedMessage.content || '');
|
|
console.log(`Removed message, tokens now: ${totalTokens}`);
|
|
} else {
|
|
break; // Safety break if we can't find anything to remove
|
|
}
|
|
}
|
|
|
|
console.log(`Context managed: ${messages.length - managedMessages.length} messages removed`);
|
|
console.log(`Final managed messages count: ${managedMessages.length}, tokens: ${totalTokens}`);
|
|
|
|
return managedMessages;
|
|
};
|
|
|
|
export async function action(args: ActionFunctionArgs) {
|
|
return chatAction(args);
|
|
}
|
|
|
|
async function chatAction({ context, request }: ActionFunctionArgs) {
|
|
const { messages } = await request.json<{ messages: Messages }>();
|
|
|
|
// NEW: Also we changed "messages" to "managedMessages" after this
|
|
const managedMessages = manageContextWindow(messages, 180000);
|
|
|
|
const stream = new SwitchableStream();
|
|
|
|
try {
|
|
// Check if we've already transitioned to the original agent
|
|
const hasTransitioned = checkIfAlreadyTransitioned(managedMessages);
|
|
|
|
if (!hasTransitioned) {
|
|
// Use your agent first
|
|
console.log('Using your agent...');
|
|
|
|
// Create options with proper stream closing and transition detection
|
|
const yourAgentOptions: StreamingOptions = {
|
|
onFinish: async ({ text: content, finishReason }: { text: string; finishReason: string }) => {
|
|
console.log('Your agent finished with reason:', finishReason);
|
|
|
|
// Check if we should transition to original agent
|
|
if (checkIfShouldTransition(content)) {
|
|
console.log('Transition detected! Immediately injecting first prompt...');
|
|
|
|
// Add the assistant's response to messages
|
|
const updatedMessages: Messages = [...managedMessages, { role: 'assistant' as const, content }];
|
|
|
|
// Inject the first prompt immediately
|
|
const injectedMessages = injectSinglePrompt(updatedMessages, 1);
|
|
|
|
// Continue with original agent using injected prompt
|
|
const originalAgentOptions: StreamingOptions = {
|
|
toolChoice: 'none',
|
|
onFinish: async ({ text: responseContent, finishReason: responseFinishReason }: { text: string; finishReason: string }) => {
|
|
if (responseFinishReason !== 'length') {
|
|
// After first prompt response, inject second prompt immediately
|
|
console.log('First prompt response complete, injecting second prompt...');
|
|
|
|
const messagesWithFirstResponse: Messages = [...injectedMessages, { role: 'assistant' as const, content: responseContent }];
|
|
const secondInjectedMessages = injectSinglePrompt(messagesWithFirstResponse, 2);
|
|
|
|
// Continue with second prompt
|
|
const secondPromptOptions: StreamingOptions = {
|
|
toolChoice: 'none',
|
|
onFinish: async ({ text: finalContent, finishReason: finalFinishReason }: { text: string; finishReason: string }) => {
|
|
if (finalFinishReason !== 'length') {
|
|
return stream.close();
|
|
}
|
|
// Handle continuation for second prompt if needed
|
|
if (stream.switches >= MAX_RESPONSE_SEGMENTS) {
|
|
throw Error('Cannot continue message: Maximum segments reached');
|
|
}
|
|
secondInjectedMessages.push({ role: 'assistant' as const, content: finalContent });
|
|
secondInjectedMessages.push({ role: 'user' as const, content: CONTINUE_PROMPT });
|
|
const result = await streamText(secondInjectedMessages, context.cloudflare.env, secondPromptOptions);
|
|
return stream.switchSource(result.toAIStream());
|
|
},
|
|
};
|
|
|
|
const secondResult = await streamText(secondInjectedMessages, context.cloudflare.env, secondPromptOptions);
|
|
return stream.switchSource(secondResult.toAIStream());
|
|
}
|
|
|
|
// Handle continuation for first prompt if needed
|
|
if (stream.switches >= MAX_RESPONSE_SEGMENTS) {
|
|
throw Error('Cannot continue message: Maximum segments reached');
|
|
}
|
|
injectedMessages.push({ role: 'assistant' as const, content: responseContent });
|
|
injectedMessages.push({ role: 'user' as const, content: CONTINUE_PROMPT });
|
|
const result = await streamText(injectedMessages, context.cloudflare.env, originalAgentOptions);
|
|
return stream.switchSource(result.toAIStream());
|
|
},
|
|
};
|
|
|
|
const originalResult = await streamText(injectedMessages, context.cloudflare.env, originalAgentOptions);
|
|
return stream.switchSource(originalResult.toAIStream());
|
|
}
|
|
|
|
// No transition - close normally
|
|
if (finishReason !== 'length') {
|
|
console.log('Closing stream - your agent finished without transition');
|
|
return stream.close();
|
|
}
|
|
|
|
// Handle continuation for your agent
|
|
if (stream.switches >= MAX_RESPONSE_SEGMENTS) {
|
|
throw Error('Cannot continue message: Maximum segments reached');
|
|
}
|
|
const switchesLeft = MAX_RESPONSE_SEGMENTS - stream.switches;
|
|
console.log(`Reached max token limit (${MAX_TOKENS}): Continuing message (${switchesLeft} switches left)`);
|
|
managedMessages.push({ role: 'assistant' as const, content });
|
|
managedMessages.push({ role: 'user' as const, content: CONTINUE_PROMPT });
|
|
const result = await streamTextWithYourAgent(managedMessages, context.cloudflare.env, yourAgentOptions);
|
|
return stream.switchSource(result.toAIStream());
|
|
},
|
|
};
|
|
|
|
const result = await streamTextWithYourAgent(managedMessages, context.cloudflare.env, yourAgentOptions);
|
|
stream.switchSource(result.toAIStream());
|
|
|
|
} else {
|
|
// We've already transitioned - normal original agent flow
|
|
console.log('Using original agent (already transitioned)...');
|
|
const options: StreamingOptions = {
|
|
toolChoice: 'none',
|
|
onFinish: async ({ text: content, finishReason }: { text: string; finishReason: string }) => {
|
|
if (finishReason !== 'length') {
|
|
return stream.close();
|
|
}
|
|
if (stream.switches >= MAX_RESPONSE_SEGMENTS) {
|
|
throw Error('Cannot continue message: Maximum segments reached');
|
|
}
|
|
const switchesLeft = MAX_RESPONSE_SEGMENTS - stream.switches;
|
|
console.log(`Reached max token limit (${MAX_TOKENS}): Continuing message (${switchesLeft} switches left)`);
|
|
managedMessages.push({ role: 'assistant' as const, content });
|
|
managedMessages.push({ role: 'user' as const, content: CONTINUE_PROMPT });
|
|
const result = await streamText(managedMessages, context.cloudflare.env, options);
|
|
return stream.switchSource(result.toAIStream());
|
|
},
|
|
};
|
|
|
|
const result = await streamText(managedMessages, context.cloudflare.env, options);
|
|
stream.switchSource(result.toAIStream());
|
|
}
|
|
|
|
return new Response(stream.readable, {
|
|
status: 200,
|
|
headers: {
|
|
contentType: 'text/plain; charset=utf-8',
|
|
},
|
|
});
|
|
|
|
} catch (error) {
|
|
console.log(error);
|
|
throw new Response(null, {
|
|
status: 500,
|
|
statusText: 'Internal Server Error',
|
|
});
|
|
}
|
|
}
|
|
|
|
// Simplified helper functions since we're handling injection inline now
|
|
function streamTextWithYourAgent(messages: Messages, env: Env, options?: StreamingOptions) {
|
|
return _streamText({
|
|
model: getAnthropicModel(getAPIKey(env)),
|
|
system: getYourAgentSystemPrompt(),
|
|
maxTokens: MAX_TOKENS,
|
|
headers: {
|
|
'anthropic-beta': 'max-tokens-3-5-sonnet-2024-07-15',
|
|
},
|
|
messages: convertToCoreMessages(messages),
|
|
...options,
|
|
});
|
|
}
|
|
|
|
function getYourAgentSystemPrompt(): string {
|
|
// return API_CHATBOT_PROMPT;
|
|
return API_CHATBOT_PROMPT;
|
|
}
|
|
|
|
function checkIfAlreadyTransitioned(messages: Messages): boolean {
|
|
// Check if any assistant message contains [final] AND we have injected prompts after it
|
|
const hasTransitionMarker = messages.some(msg =>
|
|
msg.role === 'assistant' && msg.content.includes('[final]')
|
|
);
|
|
|
|
// If no transition marker, definitely not transitioned
|
|
if (!hasTransitionMarker) {
|
|
return false;
|
|
}
|
|
|
|
// Check if we have injected prompts (meaning we're in post-transition phase)
|
|
const hasInjectedPrompts = messages.some(msg =>
|
|
msg.role === 'user' && (
|
|
msg.content.includes('[INJECTED_PROMPT_1]') ||
|
|
msg.content.includes('[INJECTED_PROMPT_2]')
|
|
)
|
|
);
|
|
|
|
return hasInjectedPrompts;
|
|
}
|
|
|
|
function checkIfShouldTransition(responseText: string): boolean {
|
|
return responseText.includes('[final]');
|
|
}
|
|
|
|
function injectSinglePrompt(messages: Messages, promptNumber: 1 | 2): Messages {
|
|
const injectedMessages = [...messages];
|
|
console.log(`Injecting prompt ${promptNumber} into messages`);
|
|
|
|
if (promptNumber === 1) {
|
|
injectedMessages.push({
|
|
role: 'user' as const,
|
|
// content: INJECTED_PROMPT_1 //'[INJECTED_PROMPT_1] Please review the API spec and be absolutely sure that you are calling those functions with the appropriate data formats, for example ensuring that you are sending object_name values, encapsulating input correctly in json, and using the exact function endpoints as they were defined.'
|
|
content: INJECTED_PROMPT_1 //
|
|
});
|
|
} else {
|
|
injectedMessages.push({
|
|
role: 'user' as const,
|
|
content: INJECTED_PROMPT_2
|
|
});
|
|
}
|
|
|
|
return injectedMessages;
|
|
}
|