Add more LLms, title generation prompt adjustment

2025-04-23 07:45:18 +00:00 · 2025-03-18 14:02:01 +01:00 · 2025-03-18 14:02:01 +01:00 · 458485b097
commit 458485b097
parent 0ff71ebfb0
2 changed files with 129 additions and 29 deletions
--- a/backend/open_webui/config.py
+++ b/backend/open_webui/config.py
@ -1102,21 +1102,20 @@ TITLE_GENERATION_PROMPT_TEMPLATE = PersistentConfig(
 )
 DEFAULT_TITLE_GENERATION_PROMPT_TEMPLATE = """### Task:
-Generate a concise, 3-5 word title with an emoji summarizing the chat history.
+Generate a concise, 3-5 word title without any emojis (this is important) summarizing the chat history.
 ### Guidelines:
 - The title should clearly represent the main theme or subject of the conversation.
- Use emojis that enhance understanding of the topic, but avoid quotation marks or special formatting.
+- Write the title in the chat's primary language; default to German if multilingual.
 - Write the title in the chat's primary language; default to English if multilingual.
 - Prioritize accuracy over excessive creativity; keep it clear and simple.
 ### Output:
 JSON format: { "title": "your concise title here" }
 ### Examples:
- { "title": "📉 Stock Market Trends" },
+- { "title": "Stock Market Trends" },
- { "title": "🍪 Perfect Chocolate Chip Recipe" },
+- { "title": "Perfect Chocolate Chip Recipe" },
 - { "title": "Evolution of Music Streaming" },
 - { "title": "Remote Work Productivity Tips" },
 - { "title": "Artificial Intelligence in Healthcare" },
- { "title": "🎮 Video Game Development Insights" }
+- { "title": "Video Game Development Insights" }
 ### Chat History:
 <chat_history>
 {{MESSAGES:END:2}}
--- a/litellm-config.yaml
+++ b/litellm-config.yaml
@ -13,51 +13,152 @@ model_list:
    model_info:
      description: "OpenAI's most advanced model, ideal for complex tasks."
      arena_elo: 1287
      knowledge_cutoff: "October 2023"
      context_window: 128000
-  - model_name: "GPT 3.5 Turbo"
+  - model_name: "GPT 4o-mini"
    litellm_params:
-      model: "azure/gpt-35-turbo"
+      model: "azure/gpt-4o-mini"
      api_base: os.environ/AZURE_OPENAI_API_BASE_URL
      api_key: os.environ/AZURE_OPENAI_API_KEY
      max_tokens: 800
    model_info:
-      description: "OpenAI's cheaper model for less complex tasks."
+      description: "OpenAI's most advanced model, ideal for complex tasks."
-      arena_elo: 1116
+      arena_elo: 1287
-      knowledge_cutoff: "September 2021"
+      context_window: 128000
-      context_window: 16000
+  - model_name: "GPT o3-mini"
  - model_name: 'Gemini 1.5 Pro'
    litellm_params:
-      model: 'gemini-1.5-pro'
+      model: "azure/o3-mini"
-      vertex_project: 'beyond-chat-1111'
+      api_base: os.environ/AZURE_OPENAI_API_BASE_URL
-      vertex_location: 'europe-west3'
+      api_key: os.environ/AZURE_OPENAI_API_KEY
      api_version: "2024-12-01-preview"
      max_tokens: 800
    model_info:
-      description: "Google's leading model with a very large context window."
+      description: "OpenAI's most advanced model, ideal for complex tasks."
      arena_elo: 1287
      context_window: 128000
  - model_name: "GPT o1"
    litellm_params:
      model: "azure/o1"
      api_base: os.environ/AZURE_OPENAI_API_BASE_URL
      api_key: os.environ/AZURE_OPENAI_API_KEY
      api_version: "2024-12-01-preview"
      max_tokens: 800
    model_info:
      description: "OpenAI's most advanced model, ideal for complex tasks."
      arena_elo: 1287
      context_window: 128000
  - model_name: 'Mistral Large 2'
    litellm_params:
      model: 'vertex_ai/mistral-large-2411@001'
      vertex_project: 'beyond-chat-1111'
      vertex_location: 'europe-west4'
      max_tokens: 800
    model_info:
      description: "Mistral Large (24.11) is the latest version of the Mistral Large model now with improved reasoning and function calling capabilities."
      arena_elo: 1265
-      knowledge_cutoff: 'November 2023'
+      context_window: 2000000
-      context_window: 2_000_000
+  - model_name: 'Gemini 2.0 Flash'
  - model_name: 'Gemini 1.5 Flash'
    litellm_params:
      model: 'gemini-2.0-flash-001'
      vertex_project: 'beyond-chat-1111'
      vertex_location: 'europe-west4'
      max_tokens: 800
-      model: 'gemini-1.5-flash-001'
+    model_info:
      description: "Workhorse model for all daily tasks. Strong overall performance and low latency supports real-time Live API."
      arena_elo: 1265
      context_window: 2000000
  - model_name: 'Llama 3.1'
    litellm_params:
      model: 'vertex_ai/llama-3.1-405b-instruct-maas'
      vertex_project: 'beyond-chat-1111'
      vertex_location: 'europe-west3'
      max_tokens: 800
    model_info:
-      description: 'Optimized for speed and efficiency.'
+      description: "Workhorse model for all daily tasks. Strong overall performance and low latency supports real-time Live API."
      arena_elo: 1265
      context_window: 2000000
  - model_name: 'Gemini 2.0 Flash Thinking'
    litellm_params:
      max_tokens: 800
      model: 'gemini-2.0-flash-thinking-exp-01-21'
      vertex_project: 'beyond-chat-1111'
      vertex_location: 'us-central1'
    model_info:
      description: 'An experimental model that exposes its thoughts so you can see its reasoning and how it reached its conclusions.'
      arena_elo: 1231
-      knowledge_cutoff: 'November 2023'
+      context_window: 1000000
-      context_window: 1_000_000
+  - model_name: 'Claude 3.5 Haiku'
-  - model_name: 'Claude 3.5 Sonnet v2'
+    litellm_params:
      model: 'vertex_ai/claude-3-5-haiku@20241022'
      vertex_project: 'beyond-chat-1111'
      vertex_location: 'us-east5'
    model_info:
      description: 'Claude 3.5 Haiku, Anthropic’s fastest and most cost-effective model, excels at use cases like code and test case generation, sub-agents, and user-facing chatbots.'
      arena_elo: 1178
      context_window: 200000
      max_tokens: 800
  - model_name: 'Claude 3.5 Sonnet'
    litellm_params:
      model: 'vertex_ai/claude-3-5-sonnet-v2@20241022'
      vertex_project: 'beyond-chat-1111'
      vertex_location: 'europe-west1'
    model_info:
-      description: 'Lightning-fast responses for simple queries.'
+      description: 'The upgraded Claude 3.5 Sonnet is now state-of-the-art for a variety of tasks including real-world software engineering, enhanced agentic capabilities, and computer use.'
      arena_elo: 1178
-      knowledge_cutoff: 'February 2024'
+      context_window: 200000
-      context_window: 200_000
+      max_tokens: 800
  - model_name: 'Claude 3.7 Sonnet'
    litellm_params:
      model: 'anthropic/claude-3-7-sonnet-20250219'
      api_key: os.environ/ANTHROPIC_API_KEY
    model_info:
      description: ''
      arena_elo: 1178
      context_window: 200000
      max_tokens: 800
  - model_name: 'Pixtral Large'
    litellm_params:
      model: 'mistral/pixtral-large-2411'
      api_key: os.environ/MISTRAL_API_KEY
    model_info:
      description: ''
      arena_elo: 1178
      context_window: 200000
      max_tokens: 800
  - model_name: 'Perplexity Sonar'
    litellm_params:
      model: 'perplexity/sonar'
      api_key: os.environ/PERPLEXITYAI_API_KEY
    model_info:
      description: ''
      arena_elo: 1178
      context_window: 200000
      max_tokens: 800
  - model_name: 'Perplexity Sonar Pro'
    litellm_params:
      model: 'perplexity/sonar-pro'
      api_key: os.environ/PERPLEXITYAI_API_KEY
    model_info:
      description: ''
      arena_elo: 1178
      context_window: 200000
      max_tokens: 800
  - model_name: 'Perplexity Sonar Reasoning Pro'
    litellm_params:
      model: 'perplexity/sonar-reasoning-pro'
      api_key: os.environ/PERPLEXITYAI_API_KEY
    model_info:
      description: ''
      arena_elo: 1178
      context_window: 200000
      max_tokens: 800
  - model_name: 'Perplexity Sonar Deep Research'
    litellm_params:
      model: 'perplexity/sonar-deep-research'
      api_key: os.environ/PERPLEXITYAI_API_KEY
    model_info:
      description: ''
      arena_elo: 1178
      context_window: 200000
      max_tokens: 800
 general_settings: {}