Merge pull request #428 from Hexastack/feat/nlu-llm

Feat/nlu llm
This commit is contained in:
Med Marrouchi 2024-12-09 15:33:08 +01:00 committed by GitHub
commit a9522d44e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 394 additions and 45 deletions

View File

@ -17,24 +17,21 @@ import {
StdIncomingMessage,
} from '@/chat/schemas/types/message';
import { Payload } from '@/chat/schemas/types/quick-reply';
import { Nlp } from '@/helper/types';
import { NLU } from '@/helper/types';
import ChannelHandler from './Handler';
export interface ChannelEvent {}
export default abstract class EventWrapper<
A,
E,
C extends ChannelHandler = ChannelHandler,
> {
// eslint-disable-next-line prettier/prettier
export default abstract class EventWrapper<A, E, C extends ChannelHandler = ChannelHandler> {
_adapter: A = {} as A;
_handler: C;
_profile!: Subscriber;
_nlp!: Nlp.ParseEntities;
_nlp!: NLU.ParseEntities;
/**
* Constructor : Class used to wrap any channel's event in order
@ -137,7 +134,7 @@ export default abstract class EventWrapper<
*
* @returns The parsed NLP entities, or null if not available.
*/
getNLP(): Nlp.ParseEntities | null {
getNLP(): NLU.ParseEntities | null {
return this._nlp;
}
@ -146,7 +143,7 @@ export default abstract class EventWrapper<
*
* @param nlp - NLP parse results
*/
setNLP(nlp: Nlp.ParseEntities) {
setNLP(nlp: NLU.ParseEntities) {
this._nlp = nlp;
}

View File

@ -7,7 +7,7 @@
*/
import { ChannelName } from '@/channel/types';
import { Nlp } from '@/helper/types';
import { NLU } from '@/helper/types';
import { Subscriber } from '../subscriber.schema';
@ -17,7 +17,7 @@ export interface Context {
channel?: ChannelName;
text?: string;
payload?: Payload | string;
nlp?: Nlp.ParseEntities | null;
nlp?: NLU.ParseEntities | null;
vars: { [key: string]: any };
user_location: {
address?: Record<string, string>;

View File

@ -13,7 +13,7 @@ import { AttachmentService } from '@/attachment/services/attachment.service';
import EventWrapper from '@/channel/lib/EventWrapper';
import { ContentService } from '@/cms/services/content.service';
import { CONSOLE_CHANNEL_NAME } from '@/extensions/channels/console/settings';
import { Nlp } from '@/helper/types';
import { NLU } from '@/helper/types';
import { I18nService } from '@/i18n/services/i18n.service';
import { LanguageService } from '@/i18n/services/language.service';
import { LoggerService } from '@/logger/logger.service';
@ -254,7 +254,7 @@ export class BlockService extends BaseService<Block, BlockPopulate, BlockFull> {
* @returns The NLP patterns that matches
*/
matchNLP(
nlp: Nlp.ParseEntities,
nlp: NLU.ParseEntities,
block: Block | BlockFull,
): NlpPattern[] | undefined {
// No nlp entities to check against

View File

@ -6,7 +6,7 @@
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { Nlp } from '@/helper/types';
import { NLU } from '@/helper/types';
import { NlpParseResultType, RasaNlu } from '../types';
@ -100,7 +100,7 @@ export const nlpParseResult: NlpParseResultType = {
text: 'Hello Joe',
};
export const nlpBestGuess: Nlp.ParseEntities = {
export const nlpBestGuess: NLU.ParseEntities = {
entities: [
{
start: 5,

View File

@ -11,7 +11,7 @@ import { Injectable } from '@nestjs/common';
import { HelperService } from '@/helper/helper.service';
import BaseNlpHelper from '@/helper/lib/base-nlp-helper';
import { Nlp } from '@/helper/types';
import { NLU } from '@/helper/types';
import { LanguageService } from '@/i18n/services/language.service';
import { LoggerService } from '@/logger/logger.service';
import { NlpEntity, NlpEntityFull } from '@/nlp/schemas/nlp-entity.schema';
@ -191,10 +191,10 @@ export default class CoreNluHelper extends BaseNlpHelper<
async filterEntitiesByConfidence(
nlp: NlpParseResultType,
threshold: boolean,
): Promise<Nlp.ParseEntities> {
): Promise<NLU.ParseEntities> {
try {
let minConfidence = 0;
const guess: Nlp.ParseEntities = {
const guess: NLU.ParseEntities = {
entities: nlp.entities.slice(),
};
if (threshold) {
@ -255,7 +255,7 @@ export default class CoreNluHelper extends BaseNlpHelper<
text: string,
threshold: boolean,
project: string = 'current',
): Promise<Nlp.ParseEntities> {
): Promise<NLU.ParseEntities> {
try {
const settings = await this.getSettings();
const { data: nlp } =

View File

@ -0,0 +1,5 @@
{
"model": "Specify the name of the LLM (Large Language Model) you want to use. Leave this field empty if you prefer to use the default model specified in the LLM helper's settings.",
"language_classifier_prompt_template": "Provide the prompt template used for language detection. Use Handlebars syntax to dynamically insert variables or customize the prompt based on your requirements.",
"trait_classifier_prompt_template": "Define the prompt template for trait classification tasks, such as intent or sentiment detection. Use Handlebars syntax to structure and format the prompt appropriately."
}

View File

@ -0,0 +1,5 @@
{
"model": "LLM Model",
"language_classifier_prompt_template": "Language Detection Prompt Template",
"trait_classifier_prompt_template": "Trait Classifier Prompt Template"
}

View File

@ -0,0 +1,3 @@
{
"llm_nlu_helper": "LLM NLU Engine"
}

View File

@ -0,0 +1,5 @@
{
"model": "Spécifiez le nom du modèle LLM que vous souhaitez utiliser. Laissez ce champ vide si vous préférez utiliser le modèle par défaut spécifié dans les paramètres de l'assistant LLM.",
"language_classifier_prompt_template": "Fournissez le modèle de prompt utilisé pour la détection de langue. Utilisez la syntaxe Handlebars pour insérer dynamiquement des variables ou personnaliser le prompt en fonction de vos besoins.",
"trait_classifier_prompt_template": "Définissez le modèle de prompt pour les tâches de classification des traits, telles que la détection d'intention ou de sentiment. Utilisez la syntaxe Handlebars pour structurer et formater correctement le prompt."
}

View File

@ -0,0 +1,5 @@
{
"model": "Modèle LLM",
"language_classifier_prompt_template": "Modèle de prompt de détection de langue",
"trait_classifier_prompt_template": "Modèle de prompt du classificateur de traits"
}

View File

@ -0,0 +1,3 @@
{
"llm_nlu_helper": "Moteur LLM NLU"
}

View File

@ -0,0 +1,22 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import LLM_NLU_HELPER_SETTINGS, { LLM_NLU_HELPER_NAMESPACE } from './settings';
declare global {
interface Settings extends SettingTree<typeof LLM_NLU_HELPER_SETTINGS> {}
}
declare module '@nestjs/event-emitter' {
interface IHookExtensionsOperationMap {
[LLM_NLU_HELPER_NAMESPACE]: TDefinition<
object,
SettingMapByType<typeof LLM_NLU_HELPER_SETTINGS>
>;
}
}

View File

@ -0,0 +1,186 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { Injectable, OnModuleInit } from '@nestjs/common';
import { OnEvent } from '@nestjs/event-emitter';
import Handlebars from 'handlebars';
import { HelperService } from '@/helper/helper.service';
import BaseNlpHelper from '@/helper/lib/base-nlp-helper';
import { LLM, NLU } from '@/helper/types';
import { LanguageService } from '@/i18n/services/language.service';
import { LoggerService } from '@/logger/logger.service';
import { NlpEntityFull } from '@/nlp/schemas/nlp-entity.schema';
import { NlpEntityService } from '@/nlp/services/nlp-entity.service';
import { SettingService } from '@/setting/services/setting.service';
import { LLM_NLU_HELPER_NAME } from './settings';
@Injectable()
export default class LlmNluHelper
extends BaseNlpHelper<typeof LLM_NLU_HELPER_NAME>
implements OnModuleInit
{
private languageClassifierPrompt: string;
/**
* Trait prompts dictionary by id
*/
private traitClassifierPrompts: Array<NlpEntityFull & { prompt: string }>;
constructor(
settingService: SettingService,
helperService: HelperService,
logger: LoggerService,
private readonly languageService: LanguageService,
private readonly nlpEntityService: NlpEntityService,
) {
super(LLM_NLU_HELPER_NAME, settingService, helperService, logger);
}
getPath() {
return __dirname;
}
@OnEvent('hook:language:*')
@OnEvent('hook:llm_nlu_helper:language_classifier_prompt_template')
async buildLanguageClassifierPrompt() {
const settings = await this.getSettings();
if (settings) {
const languages = await this.languageService.findAll();
const delegate = Handlebars.compile(
settings.language_classifier_prompt_template,
);
this.languageClassifierPrompt = delegate({ languages });
}
}
@OnEvent('hook:nlpEntity:*')
@OnEvent('hook:nlpValue:*')
@OnEvent('hook:llm_nlu_helper:trait_classifier_prompt_template')
async buildClassifiersPrompt() {
const settings = await this.getSettings();
if (settings) {
const entities = await this.nlpEntityService.findAndPopulate({
lookups: 'trait',
});
const traitEntities = entities.filter(({ lookups }) =>
lookups.includes('trait'),
);
this.traitClassifierPrompts = traitEntities.map((entity) => ({
...entity,
prompt: Handlebars.compile(settings.trait_classifier_prompt_template)({
entity,
}),
}));
}
}
async onModuleInit() {
super.onModuleInit();
await this.buildLanguageClassifierPrompt();
await this.buildClassifiersPrompt();
}
/**
* Finds entities in a given text based on their values and synonyms.
*
* This function takes a string of text and an array of entities, where each entity contains a value
* and a list of synonyms. It returns an array of objects, each representing an entity found in the text
* along with its start and end positions.
*
* @param text - The input text to search for entities.
* @param entities - An array of entities to search for, each containing a `value` and a list of `synonyms`.
*
* @returns An array of objects representing the found entities, with their `value`, `start`, and `end` positions.
*/
private findKeywordEntities(
text: string,
entity: NlpEntityFull,
): NLU.ParseEntity[] {
return entity.values
.flatMap(({ value, expressions }) => {
const allValues = [value, ...expressions];
// Filter the terms that are found in the text
return allValues
.flatMap((term) => {
const regex = new RegExp(`\\b${term}\\b`, 'g');
const matches = [...text.matchAll(regex)];
// Map matches to FoundEntity format
return matches.map((match) => ({
entity: entity.name,
value: term,
start: match.index!,
end: match.index! + term.length,
confidence: 1,
}));
})
.shift();
})
.filter((v) => !!v);
}
async predict(text: string): Promise<NLU.ParseEntities> {
const settings = await this.getSettings();
const helper = await this.helperService.getDefaultLlmHelper();
const defaultLanguage = await this.languageService.getDefaultLanguage();
// Detect language
const language = await helper.generateStructuredResponse<string>(
`input text: ${text}`,
settings.model,
this.languageClassifierPrompt,
{
type: LLM.ResponseSchemaType.STRING,
description: 'Language of the input text',
},
);
const traits: NLU.ParseEntity[] = [
{
entity: 'language',
value: language || defaultLanguage.code,
confidence: undefined,
},
];
for await (const { name, doc, prompt, values } of this
.traitClassifierPrompts) {
const allowedValues = values.map(({ value }) => value);
const result = await helper.generateStructuredResponse<string>(
`input text: ${text}`,
settings.model,
prompt,
{
type: LLM.ResponseSchemaType.STRING,
description: `${name}${doc ? `: ${doc}` : ''}`,
enum: allowedValues.concat('unknown'),
},
);
const safeValue = result.toLowerCase().trim();
const value = allowedValues.includes(safeValue) ? safeValue : '';
traits.push({
entity: name,
value,
confidence: undefined,
});
}
// Perform slot filling in a deterministic way since
// it's currently a challenging task for the LLMs.
const keywordEntities = await this.nlpEntityService.findAndPopulate({
lookups: 'keywords',
});
const entities = keywordEntities.flatMap((keywordEntity) =>
this.findKeywordEntities(text, keywordEntity),
);
return { entities: traits.concat(entities) };
}
}

View File

@ -0,0 +1,8 @@
{
"name": "hexabot-helper-llm-nlu",
"version": "2.0.0",
"description": "The LLM NLU Helper Extension for Hexabot to enable the Intent Classification and Language Detection",
"dependencies": {},
"author": "Hexastack",
"license": "AGPL-3.0-only"
}

View File

@ -0,0 +1,47 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { HelperSetting } from '@/helper/types';
import { SettingType } from '@/setting/schemas/types';
export const LLM_NLU_HELPER_NAME = 'llm-nlu-helper';
export const LLM_NLU_HELPER_NAMESPACE = 'llm_nlu_helper';
export default [
{
group: LLM_NLU_HELPER_NAMESPACE,
label: 'model',
value: '',
type: SettingType.text,
},
{
group: LLM_NLU_HELPER_NAMESPACE,
label: 'language_classifier_prompt_template',
value: `You are an advanced language detection assistant. Your task is to identify the language of the given input text from the following supported languages:
{{#each languages}}
- {{title}} (code={{code}})
{{/each}}
Provide a concise result by stating the language code only. If the language is not in the supported list, return an empty string.`,
type: SettingType.textarea,
},
{
group: LLM_NLU_HELPER_NAMESPACE,
label: 'trait_classifier_prompt_template',
value: `You are an advanced text classification assistant. Your task is to classify the given input text provided in the following {{entity.name}} values:
{{#each entity.values}}
- {{value}}
{{/each}}
Provide a concise result by stating only the value of the {{entity.name}}. Return an empty string otherwise.`,
type: SettingType.textarea,
},
] as const satisfies HelperSetting<typeof LLM_NLU_HELPER_NAME>[];

View File

@ -10,6 +10,8 @@ import { HttpModule } from '@nestjs/axios';
import { Global, Module } from '@nestjs/common';
import { InjectDynamicProviders } from 'nestjs-dynamic-providers';
import { NlpModule } from '@/nlp/nlp.module';
import { HelperController } from './helper.controller';
import { HelperService } from './helper.service';
@ -23,7 +25,7 @@ import { HelperService } from './helper.service';
'dist/.hexabot/custom/extensions/helpers/**/*.helper.js',
)
@Module({
imports: [HttpModule],
imports: [HttpModule, NlpModule],
controllers: [HelperController],
providers: [HelperService],
exports: [HelperService],

View File

@ -11,7 +11,7 @@ import { LoggerService } from '@/logger/logger.service';
import { SettingService } from '@/setting/services/setting.service';
import { HelperService } from '../helper.service';
import { HelperName, HelperType } from '../types';
import { HelperName, HelperType, LLM } from '../types';
import BaseHelper from './base-helper';
@ -30,7 +30,7 @@ export default abstract class BaseLlmHelper<
}
/**
* Generates a response using LLM
* Generates a text response using LLM
*
* @param prompt - The input text from the user
* @param model - The model to be used
@ -45,6 +45,24 @@ export default abstract class BaseLlmHelper<
extra?: any,
): Promise<string>;
/**
* Generates a structured response using LLM
*
* @param prompt - The input text from the user
* @param model - The model to be used
* @param systemPrompt - The input text from the system
* @param schema - The OpenAPI data schema
* @param extra - Extra options
* @returns {Promise<string>} - The generated response from the LLM
*/
generateStructuredResponse?<T>(
prompt: string,
model: string,
systemPrompt: string,
schema: LLM.ResponseSchema,
extra?: any,
): Promise<T>;
/**
* Send a chat completion request with the conversation history.
* You can use this same approach to start the conversation

View File

@ -23,7 +23,7 @@ import {
import { SettingService } from '@/setting/services/setting.service';
import { HelperService } from '../helper.service';
import { HelperName, HelperType, Nlp } from '../types';
import { HelperName, HelperType, NLU } from '../types';
import BaseHelper from './base-helper';
@ -119,7 +119,7 @@ export default abstract class BaseNlpHelper<
*
* @returns The formatted NLP training set
*/
abstract format(samples: NlpSampleFull[], entities: NlpEntityFull[]): unknown;
format?(samples: NlpSampleFull[], entities: NlpEntityFull[]): unknown;
/**
* Perform training request
@ -129,10 +129,7 @@ export default abstract class BaseNlpHelper<
*
* @returns Training result
*/
abstract train(
samples: NlpSampleFull[],
entities: NlpEntityFull[],
): Promise<any>;
train?(samples: NlpSampleFull[], entities: NlpEntityFull[]): Promise<any>;
/**
* Perform evaluation request
@ -142,10 +139,7 @@ export default abstract class BaseNlpHelper<
*
* @returns NLP evaluation result
*/
abstract evaluate(
samples: NlpSampleFull[],
entities: NlpEntityFull[],
): Promise<any>;
evaluate?(samples: NlpSampleFull[], entities: NlpEntityFull[]): Promise<any>;
/**
* Delete/Forget a sample
@ -154,7 +148,7 @@ export default abstract class BaseNlpHelper<
*
* @returns The deleted sample otherwise an error
*/
async forget(sample: NlpSample): Promise<NlpSample> {
async forget?(sample: NlpSample): Promise<NlpSample> {
return sample;
}
@ -166,10 +160,10 @@ export default abstract class BaseNlpHelper<
*
* @returns NLP Parsed entities
*/
abstract filterEntitiesByConfidence(
filterEntitiesByConfidence?(
nlp: any,
threshold: boolean,
): Promise<Nlp.ParseEntities>;
): Promise<NLU.ParseEntities>;
/**
* Returns only the entities that have strong confidence (> than the threshold), can return an empty result
@ -184,5 +178,5 @@ export default abstract class BaseNlpHelper<
text: string,
threshold?: boolean,
project?: string,
): Promise<Nlp.ParseEntities>;
): Promise<NLU.ParseEntities>;
}

View File

@ -13,12 +13,7 @@ import BaseHelper from './lib/base-helper';
import BaseLlmHelper from './lib/base-llm-helper';
import BaseNlpHelper from './lib/base-nlp-helper';
export namespace Nlp {
export interface Config {
endpoint?: string;
token: string;
}
export namespace NLU {
export interface ParseEntity {
entity: string; // Entity name
value: string; // Value name
@ -32,6 +27,60 @@ export namespace Nlp {
}
}
export namespace LLM {
/**
* Schema is used to define the format of input/output data.
* Represents a select subset of an OpenAPI 3.0 schema object.
* More fields may be added in the future as needed.
* @public
*/
export interface ResponseSchema {
/**
* Optional. The type of the property. {@link
* SchemaType}.
*/
type?: ResponseSchemaType;
/** Optional. The format of the property. */
format?: string;
/** Optional. The description of the property. */
description?: string;
/** Optional. Whether the property is nullable. */
nullable?: boolean;
/** Optional. The items of the property. */
items?: ResponseSchema;
/** Optional. The enum of the property. */
enum?: string[];
/** Optional. Map of {@link Schema}. */
properties?: {
[k: string]: ResponseSchema;
};
/** Optional. Array of required property. */
required?: string[];
/** Optional. The example of the property. */
example?: unknown;
}
/**
* Contains the list of OpenAPI data types
* as defined by https://swagger.io/docs/specification/data-models/data-types/
* @public
*/
export enum ResponseSchemaType {
/** String type. */
STRING = 'string',
/** Number type. */
NUMBER = 'number',
/** Integer type. */
INTEGER = 'integer',
/** Boolean type. */
BOOLEAN = 'boolean',
/** Array type. */
ARRAY = 'array',
/** Object type. */
OBJECT = 'object',
}
}
export enum HelperType {
NLU = 'nlu',
LLM = 'llm',

View File

@ -13,7 +13,7 @@ export const DEFAULT_SETTINGS = [
{
group: 'chatbot_settings',
label: 'default_nlu_helper',
value: 'core-nlu-helper',
value: 'llm-nlu-helper',
type: SettingType.select,
config: {
multiple: false,

View File

@ -6,9 +6,9 @@
* 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file).
*/
import { Nlp } from '@/helper/types';
import { NLU } from '@/helper/types';
export const nlpEntitiesGreeting: Nlp.ParseEntities = {
export const nlpEntitiesGreeting: NLU.ParseEntities = {
entities: [
{
entity: 'intent',