Created DEFAULT_NUM_CTX VAR with a deafult of 32768

2024-12-27 22:33:03 +00:00 · 2024-11-18 20:48:35 +00:00 · 2024-11-18 20:48:35 +00:00 · bfaaf86c69
commit bfaaf86c69
parent 233d22e080
2 changed files with 12 additions and 1 deletions
--- a/.env.example
+++ b/.env.example
@ -56,3 +56,10 @@ XAI_API_KEY=

 # Include this environment variable if you want more logging for debugging locally
 VITE_LOG_LEVEL=debug
+
+# Example Context Values for qwen2.5-coder:32b
+# 
+# DEFAULT_NUM_CTX=32768 # Consumes 36GB of VRAM
+# DEFAULT_NUM_CTX=24576 # Consumes 32GB of VRAM
+# DEFAULT_NUM_CTX=12288 # Consumes 26GB of VRAM
+# DEFAULT_NUM_CTX=6144 # Consumes 24GB of VRAM
--- a/app/lib/.server/llm/model.ts
+++ b/app/lib/.server/llm/model.ts
@ -8,6 +8,10 @@ import { ollama } from 'ollama-ai-provider';
 import { createOpenRouter } from "@openrouter/ai-sdk-provider";
 import { createMistral } from '@ai-sdk/mistral';

+export const DEFAULT_NUM_CTX = process.env.DEFAULT_NUM_CTX ? 
+  parseInt(process.env.DEFAULT_NUM_CTX, 10) : 
+  32768;
+
 export function getAnthropicModel(apiKey: string, model: string) {
  const anthropic = createAnthropic({
    apiKey,
@ -58,7 +62,7 @@ export function getGroqModel(apiKey: string, model: string) {

 export function getOllamaModel(baseURL: string, model: string) {
  let Ollama = ollama(model, {
-    numCtx: 32768,
+    numCtx: DEFAULT_NUM_CTX,
  });

  Ollama.config.baseURL = `${baseURL}/api`;