feat: add ollama plugin

2025-02-01 23:23:51 +00:00 · 2024-10-21 11:40:59 +01:00 · 2024-10-21 11:40:59 +01:00 · 58b0fca976
commit 58b0fca976
parent 8ef7e2abef
9 changed files with 346 additions and 0 deletions
--- a/api/src/extensions/plugins/ollama/i18n/en/help.json
+++ b/api/src/extensions/plugins/ollama/i18n/en/help.json
@ -0,0 +1,23 @@
+{
+  "api_url": "URL of the Ollama server.",
+  "model": "Determines which model to run. You need to ensure to pull the model in Ollama to be able to use it.",
+  "keep_alive": "Time to keep the model in memory.",
+  "max_messages_ctx": "Number of messages to include in the context.",
+  "context": "Provide context to the assistant (e.g., You are an AI assistant).",
+  "instructions": "Instructions to give to the assistant.",
+  "fallback_message": "Message to return in case there is an API error.",
+  "mirostat": "Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)",
+  "mirostat_eta": "Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)",
+  "mirostat_tau": "Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)",
+  "num_ctx": "Sets the size of the context window used to generate the next token. (Default: 2048)",
+  "repeat_last_n": "Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)",
+  "repeat_penalty": "Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)",
+  "temperature": "The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)",
+  "seed": "Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)",
+  "stop": "Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile.",
+  "tfs_z": "Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)",
+  "num_predict": "Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)",
+  "top_k": "Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)",
+  "top_p": "Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)",
+  "min_p": "Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0)"
+}
--- a/api/src/extensions/plugins/ollama/i18n/en/label.json
+++ b/api/src/extensions/plugins/ollama/i18n/en/label.json
@ -0,0 +1,23 @@
+{
+  "api_url": "API URL",
+  "model": "Model",
+  "keep_alive": "Keep Alive",
+  "max_messages_ctx": "Max Context Messages",
+  "context": "Context",
+  "instructions": "Instructions",
+  "fallback_message": "Fallback Message",
+  "mirostat": "Mirostat",
+  "mirostat_eta": "Mirostat Eta",
+  "mirostat_tau": "Mirostat Tau",
+  "num_ctx": "Context Window Size",
+  "repeat_last_n": "Repeat Last N",
+  "repeat_penalty": "Repeat Penalty",
+  "temperature": "Temperature",
+  "seed": "Seed",
+  "stop": "Stop",
+  "tfs_z": "TFS Z",
+  "num_predict": "Maximum number of tokens",
+  "top_k": "Top K",
+  "top_p": "Top P",
+  "min_p": "Min P"
+}
--- a/api/src/extensions/plugins/ollama/i18n/en/title.json
+++ b/api/src/extensions/plugins/ollama/i18n/en/title.json
@ -0,0 +1,3 @@
+{
+  "ollama": "Ollama"
+}
--- a/api/src/extensions/plugins/ollama/i18n/fr/help.json
+++ b/api/src/extensions/plugins/ollama/i18n/fr/help.json
@ -0,0 +1,23 @@
+{
+  "api_url": "Adresse URL du serveur Ollama.",
+  "model": "Détermine le modèle à utiliser. Assurez-vous de charger le modèle sur Ollama pour pouvoir l'utiliser.",
+  "keep_alive": "Temps pendant lequel le modèle reste en mémoire.",
+  "max_messages_ctx": "Nombre maximum de messages à inclure dans le contexte.",
+  "context": "Fournit un contexte à l'assistant (par exemple : Vous êtes un assistant IA).",
+  "instructions": "Instructions à donner à l'assistant.",
+  "fallback_message": "Message à retourner en cas d'erreur API.",
+  "mirostat": "Active le prélèvement de Mirostat pour contrôler la perplexité. (par défaut : 0, 0 = désactivé, 1 = Mirostat, 2 = Mirostat 2.0)",
+  "mirostat_eta": "Influence la rapidité de réaction de l'algorithme aux retours du texte généré. Un taux d'apprentissage plus bas entraînera des ajustements plus lents, tandis qu'un taux plus élevé rendra l'algorithme plus réactif. (Par défaut : 0.1)",
+  "mirostat_tau": "Contrôle l'équilibre entre la cohérence et la diversité de la sortie. Une valeur plus basse résulte en un texte plus concentré et cohérent. (Par défaut : 5.0)",
+  "num_ctx": "Définit la taille de la fenêtre de contexte utilisée pour générer le prochain jeton. (Par défaut : 2048)",
+  "repeat_last_n": "Définit jusqu'où le modèle doit regarder en arrière pour éviter la répétition. (Par défaut : 64, 0 = désactivé, -1 = num_ctx)",
+  "repeat_penalty": "Définit la force de la pénalité pour les répétitions. Une valeur plus élevée (par exemple, 1.5) pénalisera plus fortement les répétitions, tandis qu'une valeur plus basse (par exemple, 0.9) sera plus clémente. (Par défaut : 1.1)",
+  "temperature": "La température du modèle. Augmenter la température rendra le modèle plus créatif. (Par défaut : 0.8)",
+  "seed": "Définit la graine de nombre aléatoire à utiliser pour la génération. Fixer ce numéro permettra au modèle de générer le même texte pour la même invite. (Par défaut : 0)",
+  "stop": "Définit les séquences d'arrêt à utiliser. Lorsque ce motif est rencontré, le modèle cessera de générer du texte et retournera. Plusieurs motifs d'arrêt peuvent être définis en spécifiant plusieurs paramètres `stop` séparés dans un fichier de modèle.",
+  "tfs_z": "L'échantillonnage sans queue est utilisé pour réduire l'impact des jetons moins probables dans la sortie. Une valeur plus élevée (par exemple, 2.0) réduira davantage l'impact, tandis qu'une valeur de 1.0 désactive ce paramètre. (par défaut : 1)",
+  "num_predict": "Nombre maximum de jetons à prédire lors de la génération de texte. (Par défaut : 128, -1 = génération infinie, -2 = remplir le contexte)",
+  "top_k": "Réduit la probabilité de générer des non-sens. Une valeur plus élevée (par exemple, 100) donnera des réponses plus diverses, tandis qu'une valeur plus basse (par exemple, 10) sera plus conservatrice. (Par défaut : 40)",
+  "top_p": "Fonctionne conjointement avec top-k. Une valeur plus élevée (par exemple, 0.95) conduira à un texte plus diversifié, tandis qu'une valeur plus basse (par exemple, 0.5) générera un texte plus concentré et conservateur. (Par défaut : 0.9)",
+  "min_p": "Alternative au top_p, et vise à assurer un équilibre entre la qualité et la variété. Le paramètre *p* représente la probabilité minimum pour qu'un jeton soit considéré, par rapport à la probabilité du jeton le plus probable. Par exemple, avec *p* = 0.05 et le jeton le plus probable ayant une probabilité de 0.9, les logits d'une valeur inférieure à 0.045 sont filtrés. (Par défaut : 0.0)"
+}
--- a/api/src/extensions/plugins/ollama/i18n/fr/label.json
+++ b/api/src/extensions/plugins/ollama/i18n/fr/label.json
@ -0,0 +1,23 @@
+{
+  "api_url": "URL de l'API",
+  "model": "Modèle",
+  "keep_alive": "Maintien en Vie",
+  "max_messages_ctx": "Nombre Maximum de Messages",
+  "context": "Contexte",
+  "instructions": "Instructions",
+  "fallback_message": "Message de Secours",
+  "mirostat": "Mirostat",
+  "mirostat_eta": "Mirostat Eta",
+  "mirostat_tau": "Mirostat Tau",
+  "num_ctx": "Num Ctx",
+  "repeat_last_n": "Répéter Dernier N",
+  "repeat_penalty": "Pénalité de Répétition",
+  "temperature": "Température",
+  "seed": "Graine",
+  "stop": "Arrêt",
+  "tfs_z": "TFS Z",
+  "num_predict": "Nombre de Tokens",
+  "top_k": "Top K",
+  "top_p": "Top P",
+  "min_p": "Min P"
+}
--- a/api/src/extensions/plugins/ollama/i18n/fr/title.json
+++ b/api/src/extensions/plugins/ollama/i18n/fr/title.json
@ -0,0 +1,3 @@
+{
+  "ollama": "Ollama"
+}
--- a/api/src/extensions/plugins/ollama/index.plugin.ts
+++ b/api/src/extensions/plugins/ollama/index.plugin.ts
@ -0,0 +1,112 @@
+import { Injectable } from '@nestjs/common';
+
+import { Block } from '@/chat/schemas/block.schema';
+import { Context } from '@/chat/schemas/types/context';
+import {
+  OutgoingMessageFormat,
+  StdOutgoingTextEnvelope,
+} from '@/chat/schemas/types/message';
+import { MessageService } from '@/chat/services/message.service';
+import { ContentService } from '@/cms/services/content.service';
+import OllamaLlmHelper from '@/extensions/helpers/ollama/index.helper';
+import { HelperService } from '@/helper/helper.service';
+import { HelperType } from '@/helper/types';
+import { LoggerService } from '@/logger/logger.service';
+import { BaseBlockPlugin } from '@/plugins/base-block-plugin';
+import { PluginService } from '@/plugins/plugins.service';
+
+import { OLLAMA_PLUGIN_SETTINGS } from './settings';
+
+@Injectable()
+export class OllamaPlugin extends BaseBlockPlugin<
+  typeof OLLAMA_PLUGIN_SETTINGS
+> {
+  public readonly settings = OLLAMA_PLUGIN_SETTINGS;
+
+  constructor(
+    pluginService: PluginService,
+    private helperService: HelperService,
+    private logger: LoggerService,
+    private contentService: ContentService,
+    private messageService: MessageService,
+  ) {
+    super('ollama', OLLAMA_PLUGIN_SETTINGS, pluginService);
+
+    this.template = { name: 'Ollama Plugin' };
+    this.effects = {
+      onStoreContextData: () => {},
+    };
+  }
+
+  async process(block: Block, context: Context, _convId: string) {
+    const args = this.getArguments(block);
+
+    try {
+      const ragContent = await this.contentService.textSearch(context.text);
+
+      const systemPrompt = [
+        `CONTEXT: ${args.context}`,
+        `DOCUMENTS:`,
+        ...ragContent.map(
+          (curr, index) =>
+            `\tDOCUMENT ${index + 1} \n\t\tTitle: ${curr.title} \n\t\tData: ${curr.rag}`,
+        ),
+        `INSTRUCTIONS:`,
+        args.instructions,
+      ].join('\n');
+
+      this.logger.debug('Ollama: Prompt', systemPrompt);
+
+      const ollamaHelper = this.helperService.use(
+        HelperType.LLM,
+        OllamaLlmHelper,
+      );
+
+      const history = await this.messageService.findLastMessages(
+        context.user,
+        args.max_messages_ctx,
+      );
+
+      const options = this.settings
+        .filter(
+          (setting) => 'subgroup' in setting && setting.subgroup === 'options',
+        )
+        .reduce((acc, { label }) => {
+          acc[label] = args[label];
+          return acc;
+        }, {});
+
+      // Call Ollama API
+      const result = await ollamaHelper.generateChatCompletion(
+        context.text,
+        args.model,
+        systemPrompt,
+        history,
+        {
+          keepAlive: args.keep_alive,
+          options,
+        },
+      );
+
+      const envelope: StdOutgoingTextEnvelope = {
+        format: OutgoingMessageFormat.text,
+        message: {
+          text: result,
+        },
+      };
+
+      return envelope;
+    } catch (err) {
+      this.logger.error('Ollama Plugin: Something went wrong ...');
+      // Send fallback message
+      const envelope: StdOutgoingTextEnvelope = {
+        format: OutgoingMessageFormat.text,
+        message: {
+          text: args.fallback_message,
+        },
+      };
+
+      return envelope;
+    }
+  }
+}
--- a/api/src/extensions/plugins/ollama/package.json
+++ b/api/src/extensions/plugins/ollama/package.json
@ -0,0 +1,11 @@
+{
+  "name": "hexabot-ollama",
+  "version": "2.0.0",
+  "description": "The Ollama Plugin Extension for Hexabot Chatbot / Agent Builder that provides a custom block for Generative AI + RAG",
+  "dependencies": {},
+  "extensions": {
+    "hexabot-helper-ollama": "2.0.0"
+  },
+  "author": "Hexastack",
+  "license": "AGPL-3.0-only"
+}
--- a/api/src/extensions/plugins/ollama/settings.ts
+++ b/api/src/extensions/plugins/ollama/settings.ts
@ -0,0 +1,125 @@
+import { PluginSetting } from '@/plugins/types';
+import { SettingType } from '@/setting/schemas/types';
+
+export const OLLAMA_PLUGIN_SETTINGS = [
+  {
+    label: 'model',
+    group: 'default',
+    type: SettingType.text,
+    value: 'llama3.2', // Default model
+  },
+  {
+    label: 'keep_alive',
+    group: 'default',
+    type: SettingType.text,
+    value: '5m', // Default value for keeping the model in memory
+  },
+  {
+    label: 'max_messages_ctx',
+    group: 'default',
+    type: SettingType.number,
+    value: 5, // Default number of messages to retrieve for context
+  },
+  {
+    label: 'context',
+    group: 'default',
+    type: SettingType.text,
+    value: `You are an AI Assistant that works for Hexastack, the IT company behind Hexabot the chatbot builder.`, // Default value for keeping the model in memory
+  },
+  {
+    label: 'instructions',
+    group: 'default',
+    type: SettingType.textarea,
+    value: `Answer the user QUESTION using the DOCUMENTS text above. Keep your answer ground in the facts of the DOCUMENT. If the DOCUMENT doesn’t contain the facts to answer the QUESTION, apologize and try to give an answer that promotes the company and its values. DO NOT SAY ANYTHING ABOUT THESE DOCUMENTS, nor their EXISTENCE.`,
+  },
+  {
+    label: 'fallback_message',
+    group: 'default',
+    type: SettingType.textarea,
+    value: `Something went wrong ... please try again later.`,
+  },
+  {
+    label: 'mirostat',
+    group: 'options',
+    type: SettingType.number,
+    value: 0, // Default: disabled
+  },
+  {
+    label: 'mirostat_eta',
+    group: 'options',
+    type: SettingType.number,
+    value: 0.1, // Default value
+  },
+  {
+    label: 'mirostat_tau',
+    group: 'options',
+    type: SettingType.number,
+    value: 5.0, // Default value
+  },
+  {
+    label: 'num_ctx',
+    group: 'options',
+    type: SettingType.number,
+    value: 2048, // Default value
+  },
+  {
+    label: 'repeat_last_n',
+    group: 'options',
+    type: SettingType.number,
+    value: 64, // Default value
+  },
+  {
+    label: 'repeat_penalty',
+    group: 'options',
+    type: SettingType.number,
+    value: 1.1, // Default value
+  },
+  {
+    label: 'temperature',
+    group: 'options',
+    type: SettingType.number,
+    value: 0.8, // Default value
+  },
+  {
+    label: 'seed',
+    group: 'options',
+    type: SettingType.number,
+    value: 0, // Default value
+  },
+  {
+    label: 'stop',
+    group: 'options',
+    type: SettingType.text,
+    value: 'AI assistant:', // Default stop sequence
+  },
+  {
+    label: 'tfs_z',
+    group: 'options',
+    type: SettingType.number,
+    value: 1, // Default value, 1.0 means disabled
+  },
+  {
+    label: 'num_predict',
+    group: 'options',
+    type: SettingType.number,
+    value: 20, // Default value
+  },
+  {
+    label: 'top_k',
+    group: 'options',
+    type: SettingType.number,
+    value: 40, // Default value
+  },
+  {
+    label: 'top_p',
+    group: 'options',
+    type: SettingType.number,
+    value: 0.9, // Default value
+  },
+  {
+    label: 'min_p',
+    group: 'options',
+    type: SettingType.number,
+    value: 0.0, // Default value
+  },
+] as const satisfies PluginSetting[];