feat: add ollama helper & plugin as a npm pkg (installed by default)

This commit is contained in:
Mohamed Marrouchi 2024-10-23 16:33:21 +01:00
parent c9b7567fd5
commit e2e61ef7c9
21 changed files with 21 additions and 762 deletions

18
api/package-lock.json generated
View File

@ -33,6 +33,8 @@
"dotenv": "^16.3.1",
"ejs": "^3.1.9",
"express-session": "^1.17.3",
"hexabot-helper-ollama": "^2.0.0",
"hexabot-plugin-ollama": "^2.0.0",
"joi": "^17.11.0",
"module-alias": "^2.2.3",
"mongoose": "^8.0.0",
@ -11355,6 +11357,22 @@
"he": "bin/he"
}
},
"node_modules/hexabot-helper-ollama": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/hexabot-helper-ollama/-/hexabot-helper-ollama-2.0.0.tgz",
"integrity": "sha512-VYY+zlOBmCrkhBML/ZUBA96VXXpsHcrGiAVk2s1GsyaOUVOjQtovQE9rHlU6qf38biGd72LUQ827323pZim6GA==",
"dependencies": {
"ollama": "^0.5.9"
}
},
"node_modules/hexabot-plugin-ollama": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/hexabot-plugin-ollama/-/hexabot-plugin-ollama-2.0.0.tgz",
"integrity": "sha512-/YE2o2UYNIG1vymViTcOrVlpFFmMswZVEI9EkDC9Xs9RzzHtIRD473RQE1PhTDFLHBAG3Qx3NgbiaJR/el4GXQ==",
"dependencies": {
"hexabot-helper-ollama": "2.0.0"
}
},
"node_modules/hexoid": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/hexoid/-/hexoid-1.0.0.tgz",

View File

@ -59,6 +59,8 @@
"dotenv": "^16.3.1",
"ejs": "^3.1.9",
"express-session": "^1.17.3",
"hexabot-helper-ollama": "^2.0.0",
"hexabot-plugin-ollama": "^2.0.0",
"joi": "^17.11.0",
"module-alias": "^2.2.3",
"mongoose": "^8.0.0",
@ -165,4 +167,4 @@
"@/(.*)": "<rootDir>/$1"
}
}
}
}

View File

@ -1,23 +0,0 @@
{
"api_url": "URL of the Ollama server.",
"model": "Determines which model to run. You need to ensure to pull the model in Ollama to be able to use it.",
"keep_alive": "Time to keep the model in memory.",
"max_messages_ctx": "Number of messages to include in the context.",
"context": "Provide context to the assistant (e.g., You are an AI assistant).",
"instructions": "Instructions to give to the assistant.",
"fallback_message": "Message to return in case there is an API error.",
"mirostat": "Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)",
"mirostat_eta": "Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)",
"mirostat_tau": "Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)",
"num_ctx": "Sets the size of the context window used to generate the next token. (Default: 2048)",
"repeat_last_n": "Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)",
"repeat_penalty": "Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)",
"temperature": "The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)",
"seed": "Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)",
"stop": "Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile.",
"tfs_z": "Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)",
"num_predict": "Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)",
"top_k": "Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)",
"top_p": "Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)",
"min_p": "Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0)"
}

View File

@ -1,23 +0,0 @@
{
"api_url": "API URL",
"model": "Model",
"keep_alive": "Keep Alive",
"max_messages_ctx": "Max Context Messages",
"context": "Context",
"instructions": "Instructions",
"fallback_message": "Fallback Message",
"mirostat": "Mirostat",
"mirostat_eta": "Mirostat Eta",
"mirostat_tau": "Mirostat Tau",
"num_ctx": "Context Window Size",
"repeat_last_n": "Repeat Last N",
"repeat_penalty": "Repeat Penalty",
"temperature": "Temperature",
"seed": "Seed",
"stop": "Stop",
"tfs_z": "TFS Z",
"num_predict": "Maximum number of tokens",
"top_k": "Top K",
"top_p": "Top P",
"min_p": "Min P"
}

View File

@ -1,3 +0,0 @@
{
"ollama_helper": "Ollama"
}

View File

@ -1,23 +0,0 @@
{
"api_url": "Adresse URL du serveur Ollama.",
"model": "Détermine le modèle à utiliser. Assurez-vous de charger le modèle sur Ollama pour pouvoir l'utiliser.",
"keep_alive": "Temps pendant lequel le modèle reste en mémoire.",
"max_messages_ctx": "Nombre maximum de messages à inclure dans le contexte.",
"context": "Fournit un contexte à l'assistant (par exemple : Vous êtes un assistant IA).",
"instructions": "Instructions à donner à l'assistant.",
"fallback_message": "Message à retourner en cas d'erreur API.",
"mirostat": "Active le prélèvement de Mirostat pour contrôler la perplexité. (par défaut : 0, 0 = désactivé, 1 = Mirostat, 2 = Mirostat 2.0)",
"mirostat_eta": "Influence la rapidité de réaction de l'algorithme aux retours du texte généré. Un taux d'apprentissage plus bas entraînera des ajustements plus lents, tandis qu'un taux plus élevé rendra l'algorithme plus réactif. (Par défaut : 0.1)",
"mirostat_tau": "Contrôle l'équilibre entre la cohérence et la diversité de la sortie. Une valeur plus basse résulte en un texte plus concentré et cohérent. (Par défaut : 5.0)",
"num_ctx": "Définit la taille de la fenêtre de contexte utilisée pour générer le prochain jeton. (Par défaut : 2048)",
"repeat_last_n": "Définit jusqu'où le modèle doit regarder en arrière pour éviter la répétition. (Par défaut : 64, 0 = désactivé, -1 = num_ctx)",
"repeat_penalty": "Définit la force de la pénalité pour les répétitions. Une valeur plus élevée (par exemple, 1.5) pénalisera plus fortement les répétitions, tandis qu'une valeur plus basse (par exemple, 0.9) sera plus clémente. (Par défaut : 1.1)",
"temperature": "La température du modèle. Augmenter la température rendra le modèle plus créatif. (Par défaut : 0.8)",
"seed": "Définit la graine de nombre aléatoire à utiliser pour la génération. Fixer ce numéro permettra au modèle de générer le même texte pour la même invite. (Par défaut : 0)",
"stop": "Définit les séquences d'arrêt à utiliser. Lorsque ce motif est rencontré, le modèle cessera de générer du texte et retournera. Plusieurs motifs d'arrêt peuvent être définis en spécifiant plusieurs paramètres `stop` séparés dans un fichier de modèle.",
"tfs_z": "L'échantillonnage sans queue est utilisé pour réduire l'impact des jetons moins probables dans la sortie. Une valeur plus élevée (par exemple, 2.0) réduira davantage l'impact, tandis qu'une valeur de 1.0 désactive ce paramètre. (par défaut : 1)",
"num_predict": "Nombre maximum de jetons à prédire lors de la génération de texte. (Par défaut : 128, -1 = génération infinie, -2 = remplir le contexte)",
"top_k": "Réduit la probabilité de générer des non-sens. Une valeur plus élevée (par exemple, 100) donnera des réponses plus diverses, tandis qu'une valeur plus basse (par exemple, 10) sera plus conservatrice. (Par défaut : 40)",
"top_p": "Fonctionne conjointement avec top-k. Une valeur plus élevée (par exemple, 0.95) conduira à un texte plus diversifié, tandis qu'une valeur plus basse (par exemple, 0.5) générera un texte plus concentré et conservateur. (Par défaut : 0.9)",
"min_p": "Alternative au top_p, et vise à assurer un équilibre entre la qualité et la variété. Le paramètre *p* représente la probabilité minimum pour qu'un jeton soit considéré, par rapport à la probabilité du jeton le plus probable. Par exemple, avec *p* = 0.05 et le jeton le plus probable ayant une probabilité de 0.9, les logits d'une valeur inférieure à 0.045 sont filtrés. (Par défaut : 0.0)"
}

View File

@ -1,23 +0,0 @@
{
"api_url": "URL de l'API",
"model": "Modèle",
"keep_alive": "Maintien en Vie",
"max_messages_ctx": "Nombre Maximum de Messages",
"context": "Contexte",
"instructions": "Instructions",
"fallback_message": "Message de Secours",
"mirostat": "Mirostat",
"mirostat_eta": "Mirostat Eta",
"mirostat_tau": "Mirostat Tau",
"num_ctx": "Num Ctx",
"repeat_last_n": "Répéter Dernier N",
"repeat_penalty": "Pénalité de Répétition",
"temperature": "Température",
"seed": "Graine",
"stop": "Arrêt",
"tfs_z": "TFS Z",
"num_predict": "Nombre de Tokens",
"top_k": "Top K",
"top_p": "Top P",
"min_p": "Min P"
}

View File

@ -1,3 +0,0 @@
{
"ollama_helper": "Ollama"
}

View File

@ -1,22 +0,0 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import OLLAMA_HELPER_SETTINGS, { OLLAMA_HELPER_NAMESPACE } from './settings';
declare global {
interface Settings extends SettingTree<typeof OLLAMA_HELPER_SETTINGS> {}
}
declare module '@nestjs/event-emitter' {
interface IHookExtensionsOperationMap {
[OLLAMA_HELPER_NAMESPACE]: TDefinition<
object,
SettingMapByType<typeof OLLAMA_HELPER_SETTINGS>
>;
}
}

View File

@ -1,134 +0,0 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { Injectable, OnApplicationBootstrap } from '@nestjs/common';
import { OnEvent } from '@nestjs/event-emitter';
import { Ollama } from 'ollama';
import { AnyMessage } from '@/chat/schemas/types/message';
import { HelperService } from '@/helper/helper.service';
import BaseLlmHelper from '@/helper/lib/base-llm-helper';
import { LoggerService } from '@/logger/logger.service';
import { Setting } from '@/setting/schemas/setting.schema';
import { SettingService } from '@/setting/services/setting.service';
import { OLLAMA_HELPER_NAME } from './settings';
@Injectable()
export default class OllamaLlmHelper
extends BaseLlmHelper<typeof OLLAMA_HELPER_NAME>
implements OnApplicationBootstrap
{
private client: Ollama;
/**
* Instantiate the LLM helper
*
* @param logger - Logger service
*/
constructor(
settingService: SettingService,
helperService: HelperService,
protected readonly logger: LoggerService,
) {
super('ollama-helper', settingService, helperService, logger);
}
getPath(): string {
return __dirname;
}
async onApplicationBootstrap() {
const settings = await this.getSettings();
this.client = new Ollama({ host: settings.api_url });
}
@OnEvent('hook:ollama_helper:api_url')
handleApiUrlChange(setting: Setting) {
this.client = new Ollama({ host: setting.value });
}
/**
* Generates a response using LLM
*
* @param prompt - The input text from the user
* @param model - The model to be used
* @param systemPrompt - The input text from the system
* @returns {Promise<string>} - The generated response from the LLM
*/
async generateResponse(
prompt: string,
model: string,
system: string,
{ keepAlive = '5m', options = {} },
): Promise<string> {
const response = await this.client.generate({
model,
prompt,
system,
keep_alive: keepAlive,
options,
});
return response.response ? response.response : '';
}
/**
* Formats messages to the Ollama required data structure
*
* @param messages - Message history to include
*
* @returns Ollama message array
*/
private formatMessages(messages: AnyMessage[]) {
return messages.map((m) => {
return {
role: 'sender' in m && m.sender ? 'user' : 'assistant',
content: 'text' in m.message && m.message.text ? m.message.text : '',
};
});
}
/**
* Send a chat completion request with the conversation history.
* You can use this same approach to start the conversation
* using multi-shot or chain-of-thought prompting.
*
* @param prompt - The input text from the user
* @param model - The model to be used
* @param history - Array of messages
* @returns {Promise<string>} - The generated response from the LLM
*/
public async generateChatCompletion(
prompt: string,
model: string,
systemPrompt: string,
history: AnyMessage[] = [],
{ keepAlive = '5m', options = {} },
) {
const response = await this.client.chat({
model,
messages: [
{
role: 'system',
content: systemPrompt,
},
...this.formatMessages(history),
{
role: 'user',
content: prompt,
},
],
keep_alive: keepAlive,
options,
});
return response.message.content ? response.message.content : '';
}
}

View File

@ -1,10 +0,0 @@
{
"name": "hexabot-helper-ollama",
"version": "2.0.0",
"description": "The Ollama Helper Extension for Hexabot Chatbot / Agent Builder to enable the LLM Capability",
"dependencies": {
"ollama": "^0.5.9"
},
"author": "Hexastack",
"license": "AGPL-3.0-only"
}

View File

@ -1,135 +0,0 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { HelperSetting } from '@/helper/types';
import { SettingType } from '@/setting/schemas/types';
export const OLLAMA_HELPER_NAME = 'ollama-helper';
export const OLLAMA_HELPER_NAMESPACE: HyphenToUnderscore<
typeof OLLAMA_HELPER_NAME
> = 'ollama_helper';
export default [
{
label: 'api_url',
group: OLLAMA_HELPER_NAMESPACE,
type: SettingType.text,
value: 'http://ollama:11434', // Default value
},
{
label: 'model',
group: OLLAMA_HELPER_NAMESPACE,
type: SettingType.text,
value: 'llama3.2', // Default model
},
{
label: 'keep_alive',
group: OLLAMA_HELPER_NAMESPACE,
type: SettingType.text,
value: '5m', // Default value for keeping the model in memory
},
{
label: 'mirostat',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 0, // Default: disabled
},
{
label: 'mirostat_eta',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 0.1, // Default value
},
{
label: 'mirostat_tau',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 5.0, // Default value
},
{
label: 'num_ctx',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 2048, // Default value
},
{
label: 'repeat_last_n',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 64, // Default value
},
{
label: 'repeat_penalty',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 1.1, // Default value
},
{
label: 'temperature',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 0.8, // Default value
},
{
label: 'seed',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 0, // Default value
},
{
label: 'stop',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.text,
value: 'AI assistant:', // Default stop sequence
},
{
label: 'tfs_z',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 1, // Default value, 1.0 means disabled
},
{
label: 'num_predict',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 20, // Default value
},
{
label: 'top_k',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 40, // Default value
},
{
label: 'top_p',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 0.9, // Default value
},
{
label: 'min_p',
group: OLLAMA_HELPER_NAMESPACE,
subgroup: 'options',
type: SettingType.number,
value: 0.0, // Default value
},
] as const satisfies HelperSetting<typeof OLLAMA_HELPER_NAME>[];

View File

@ -1,23 +0,0 @@
{
"api_url": "URL of the Ollama server.",
"model": "Determines which model to run. You need to ensure to pull the model in Ollama to be able to use it.",
"keep_alive": "Time to keep the model in memory.",
"max_messages_ctx": "Number of messages to include in the context.",
"context": "Provide context to the assistant (e.g., You are an AI assistant).",
"instructions": "Instructions to give to the assistant.",
"fallback_message": "Message to return in case there is an API error.",
"mirostat": "Enable Mirostat sampling for controlling perplexity. (default: 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)",
"mirostat_eta": "Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. (Default: 0.1)",
"mirostat_tau": "Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0)",
"num_ctx": "Sets the size of the context window used to generate the next token. (Default: 2048)",
"repeat_last_n": "Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx)",
"repeat_penalty": "Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1)",
"temperature": "The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)",
"seed": "Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0)",
"stop": "Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile.",
"tfs_z": "Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1)",
"num_predict": "Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context)",
"top_k": "Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)",
"top_p": "Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)",
"min_p": "Alternative to the top_p, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. For example, with *p*=0.05 and the most likely token having a probability of 0.9, logits with a value less than 0.045 are filtered out. (Default: 0.0)"
}

View File

@ -1,23 +0,0 @@
{
"api_url": "API URL",
"model": "Model",
"keep_alive": "Keep Alive",
"max_messages_ctx": "Max Context Messages",
"context": "Context",
"instructions": "Instructions",
"fallback_message": "Fallback Message",
"mirostat": "Mirostat",
"mirostat_eta": "Mirostat Eta",
"mirostat_tau": "Mirostat Tau",
"num_ctx": "Context Window Size",
"repeat_last_n": "Repeat Last N",
"repeat_penalty": "Repeat Penalty",
"temperature": "Temperature",
"seed": "Seed",
"stop": "Stop",
"tfs_z": "TFS Z",
"num_predict": "Maximum number of tokens",
"top_k": "Top K",
"top_p": "Top P",
"min_p": "Min P"
}

View File

@ -1,3 +0,0 @@
{
"ollama_plugin": "Ollama Plugin"
}

View File

@ -1,23 +0,0 @@
{
"api_url": "Adresse URL du serveur Ollama.",
"model": "Détermine le modèle à utiliser. Assurez-vous de charger le modèle sur Ollama pour pouvoir l'utiliser.",
"keep_alive": "Temps pendant lequel le modèle reste en mémoire.",
"max_messages_ctx": "Nombre maximum de messages à inclure dans le contexte.",
"context": "Fournit un contexte à l'assistant (par exemple : Vous êtes un assistant IA).",
"instructions": "Instructions à donner à l'assistant.",
"fallback_message": "Message à retourner en cas d'erreur API.",
"mirostat": "Active le prélèvement de Mirostat pour contrôler la perplexité. (par défaut : 0, 0 = désactivé, 1 = Mirostat, 2 = Mirostat 2.0)",
"mirostat_eta": "Influence la rapidité de réaction de l'algorithme aux retours du texte généré. Un taux d'apprentissage plus bas entraînera des ajustements plus lents, tandis qu'un taux plus élevé rendra l'algorithme plus réactif. (Par défaut : 0.1)",
"mirostat_tau": "Contrôle l'équilibre entre la cohérence et la diversité de la sortie. Une valeur plus basse résulte en un texte plus concentré et cohérent. (Par défaut : 5.0)",
"num_ctx": "Définit la taille de la fenêtre de contexte utilisée pour générer le prochain jeton. (Par défaut : 2048)",
"repeat_last_n": "Définit jusqu'où le modèle doit regarder en arrière pour éviter la répétition. (Par défaut : 64, 0 = désactivé, -1 = num_ctx)",
"repeat_penalty": "Définit la force de la pénalité pour les répétitions. Une valeur plus élevée (par exemple, 1.5) pénalisera plus fortement les répétitions, tandis qu'une valeur plus basse (par exemple, 0.9) sera plus clémente. (Par défaut : 1.1)",
"temperature": "La température du modèle. Augmenter la température rendra le modèle plus créatif. (Par défaut : 0.8)",
"seed": "Définit la graine de nombre aléatoire à utiliser pour la génération. Fixer ce numéro permettra au modèle de générer le même texte pour la même invite. (Par défaut : 0)",
"stop": "Définit les séquences d'arrêt à utiliser. Lorsque ce motif est rencontré, le modèle cessera de générer du texte et retournera. Plusieurs motifs d'arrêt peuvent être définis en spécifiant plusieurs paramètres `stop` séparés dans un fichier de modèle.",
"tfs_z": "L'échantillonnage sans queue est utilisé pour réduire l'impact des jetons moins probables dans la sortie. Une valeur plus élevée (par exemple, 2.0) réduira davantage l'impact, tandis qu'une valeur de 1.0 désactive ce paramètre. (par défaut : 1)",
"num_predict": "Nombre maximum de jetons à prédire lors de la génération de texte. (Par défaut : 128, -1 = génération infinie, -2 = remplir le contexte)",
"top_k": "Réduit la probabilité de générer des non-sens. Une valeur plus élevée (par exemple, 100) donnera des réponses plus diverses, tandis qu'une valeur plus basse (par exemple, 10) sera plus conservatrice. (Par défaut : 40)",
"top_p": "Fonctionne conjointement avec top-k. Une valeur plus élevée (par exemple, 0.95) conduira à un texte plus diversifié, tandis qu'une valeur plus basse (par exemple, 0.5) générera un texte plus concentré et conservateur. (Par défaut : 0.9)",
"min_p": "Alternative au top_p, et vise à assurer un équilibre entre la qualité et la variété. Le paramètre *p* représente la probabilité minimum pour qu'un jeton soit considéré, par rapport à la probabilité du jeton le plus probable. Par exemple, avec *p* = 0.05 et le jeton le plus probable ayant une probabilité de 0.9, les logits d'une valeur inférieure à 0.045 sont filtrés. (Par défaut : 0.0)"
}

View File

@ -1,23 +0,0 @@
{
"api_url": "URL de l'API",
"model": "Modèle",
"keep_alive": "Maintien en Vie",
"max_messages_ctx": "Nombre Maximum de Messages",
"context": "Contexte",
"instructions": "Instructions",
"fallback_message": "Message de Secours",
"mirostat": "Mirostat",
"mirostat_eta": "Mirostat Eta",
"mirostat_tau": "Mirostat Tau",
"num_ctx": "Num Ctx",
"repeat_last_n": "Répéter Dernier N",
"repeat_penalty": "Pénalité de Répétition",
"temperature": "Température",
"seed": "Graine",
"stop": "Arrêt",
"tfs_z": "TFS Z",
"num_predict": "Nombre de Tokens",
"top_k": "Top K",
"top_p": "Top P",
"min_p": "Min P"
}

View File

@ -1,3 +0,0 @@
{
"ollama_plugin": "Ollama Plugin"
}

View File

@ -1,121 +0,0 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { Injectable } from '@nestjs/common';
import { Block } from '@/chat/schemas/block.schema';
import { Context } from '@/chat/schemas/types/context';
import {
OutgoingMessageFormat,
StdOutgoingTextEnvelope,
} from '@/chat/schemas/types/message';
import { MessageService } from '@/chat/services/message.service';
import { ContentService } from '@/cms/services/content.service';
import OllamaLlmHelper from '@/extensions/helpers/ollama/index.helper';
import { HelperService } from '@/helper/helper.service';
import { HelperType } from '@/helper/types';
import { LoggerService } from '@/logger/logger.service';
import { BaseBlockPlugin } from '@/plugins/base-block-plugin';
import { PluginService } from '@/plugins/plugins.service';
import { PluginBlockTemplate } from '@/plugins/types';
import SETTINGS from './settings';
@Injectable()
export class OllamaPlugin extends BaseBlockPlugin<typeof SETTINGS> {
template: PluginBlockTemplate = { name: 'Ollama Plugin' };
constructor(
pluginService: PluginService,
private helperService: HelperService,
private logger: LoggerService,
private contentService: ContentService,
private messageService: MessageService,
) {
super('ollama-plugin', pluginService);
}
getPath(): string {
return __dirname;
}
async process(block: Block, context: Context, _convId: string) {
const args = this.getArguments(block);
try {
const ragContent = await this.contentService.textSearch(context.text);
const systemPrompt = [
`CONTEXT: ${args.context}`,
`DOCUMENTS:`,
...ragContent.map(
(curr, index) =>
`\tDOCUMENT ${index + 1} \n\t\tTitle: ${curr.title} \n\t\tData: ${curr.rag}`,
),
`INSTRUCTIONS:`,
args.instructions,
].join('\n');
this.logger.debug('Ollama: Prompt', systemPrompt);
const ollamaHelper = this.helperService.use(
HelperType.LLM,
OllamaLlmHelper,
);
const history = await this.messageService.findLastMessages(
context.user,
args.max_messages_ctx,
);
const options = this.settings
.filter(
(setting) =>
'subgroup' in setting &&
setting.subgroup === 'options' &&
setting.value !== null,
)
.reduce((acc, { label }) => {
acc[label] = args[label];
return acc;
}, {});
// Call Ollama API
const result = await ollamaHelper.generateChatCompletion(
context.text,
args.model,
systemPrompt,
history,
{
keepAlive: args.keep_alive,
options,
},
);
const envelope: StdOutgoingTextEnvelope = {
format: OutgoingMessageFormat.text,
message: {
text: result,
},
};
return envelope;
} catch (err) {
this.logger.error('Ollama Plugin: Something went wrong ...');
// Send fallback message
const envelope: StdOutgoingTextEnvelope = {
format: OutgoingMessageFormat.text,
message: {
text: args.fallback_message,
},
};
return envelope;
}
}
}

View File

@ -1,10 +0,0 @@
{
"name": "hexabot-plugin-ollama",
"version": "2.0.0",
"description": "The Ollama Plugin Extension for Hexabot Chatbot / Agent Builder that provides a custom block for Generative AI + RAG",
"dependencies": {
"hexabot-helper-ollama": "2.0.0"
},
"author": "Hexastack",
"license": "AGPL-3.0-only"
}

View File

@ -1,133 +0,0 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { PluginSetting } from '@/plugins/types';
import { SettingType } from '@/setting/schemas/types';
export default [
{
label: 'model',
group: 'default',
type: SettingType.text,
value: 'llama3.2', // Default model
},
{
label: 'keep_alive',
group: 'default',
type: SettingType.text,
value: '5m', // Default value for keeping the model in memory
},
{
label: 'max_messages_ctx',
group: 'default',
type: SettingType.number,
value: 5, // Default number of messages to retrieve for context
},
{
label: 'context',
group: 'default',
type: SettingType.text,
value: `You are an AI Assistant that works for Hexastack, the IT company behind Hexabot the chatbot builder.`, // Default value for keeping the model in memory
},
{
label: 'instructions',
group: 'default',
type: SettingType.textarea,
value: `Answer the user QUESTION using the DOCUMENTS text above. Keep your answer ground in the facts of the DOCUMENT. If the DOCUMENT doesnt contain the facts to answer the QUESTION, apologize and try to give an answer that promotes the company and its values. DO NOT SAY ANYTHING ABOUT THESE DOCUMENTS, nor their EXISTENCE.`,
},
{
label: 'fallback_message',
group: 'default',
type: SettingType.textarea,
value: `Something went wrong ... please try again later.`,
},
{
label: 'mirostat',
group: 'options',
type: SettingType.number,
value: 0, // Default: disabled
},
{
label: 'mirostat_eta',
group: 'options',
type: SettingType.number,
value: 0.1, // Default value
},
{
label: 'mirostat_tau',
group: 'options',
type: SettingType.number,
value: 5.0, // Default value
},
{
label: 'num_ctx',
group: 'options',
type: SettingType.number,
value: 2048, // Default value
},
{
label: 'repeat_last_n',
group: 'options',
type: SettingType.number,
value: 64, // Default value
},
{
label: 'repeat_penalty',
group: 'options',
type: SettingType.number,
value: 1.1, // Default value
},
{
label: 'temperature',
group: 'options',
type: SettingType.number,
value: 0.8, // Default value
},
{
label: 'seed',
group: 'options',
type: SettingType.number,
value: 0, // Default value
},
{
label: 'stop',
group: 'options',
type: SettingType.text,
value: 'AI assistant:', // Default stop sequence
},
{
label: 'tfs_z',
group: 'options',
type: SettingType.number,
value: 1, // Default value, 1.0 means disabled
},
{
label: 'num_predict',
group: 'options',
type: SettingType.number,
value: 20, // Default value
},
{
label: 'top_k',
group: 'options',
type: SettingType.number,
value: 40, // Default value
},
{
label: 'top_p',
group: 'options',
type: SettingType.number,
value: 0.9, // Default value
},
{
label: 'min_p',
group: 'options',
type: SettingType.number,
value: 0.0, // Default value
},
] as const satisfies PluginSetting[];