diff --git a/api/src/chat/services/block.service.spec.ts b/api/src/chat/services/block.service.spec.ts index 432bf5e3..cc94f34e 100644 --- a/api/src/chat/services/block.service.spec.ts +++ b/api/src/chat/services/block.service.spec.ts @@ -59,6 +59,7 @@ import { blockProductListMock, blocks, mockNlpAffirmationPatterns, + mockNlpFirstNamePatterns, mockNlpGreetingAnyNamePatterns, mockNlpGreetingNamePatterns, mockNlpGreetingPatterns, @@ -69,6 +70,7 @@ import { subscriberContextBlankInstance, } from '@/utils/test/mocks/conversation'; import { + mockNlpFirstNameEntities, mockNlpGreetingFullNameEntities, mockNlpGreetingNameEntities, } from '@/utils/test/mocks/nlp'; @@ -353,6 +355,25 @@ describe('BlockService', () => { ]); }); + it('should return match nlp patterns with synonyms match (canonical value)', () => { + const result = blockService.getMatchingNluPatterns( + mockNlpFirstNameEntities, + { + ...blockGetStarted, + patterns: [...blockGetStarted.patterns, mockNlpFirstNamePatterns], + }, + ); + expect(result).toEqual([ + [ + { + entity: 'firstname', + match: 'value', + value: 'jhon', + }, + ], + ]); + }); + it('should return empty array when it does not match nlp patterns', () => { const result = blockService.getMatchingNluPatterns( mockNlpGreetingFullNameEntities, diff --git a/api/src/chat/services/block.service.ts b/api/src/chat/services/block.service.ts index 0853d2b2..569c75cc 100644 --- a/api/src/chat/services/block.service.ts +++ b/api/src/chat/services/block.service.ts @@ -294,11 +294,11 @@ export class BlockService extends BaseService< * @returns The NLU patterns that matches the predicted entities */ getMatchingNluPatterns( - nlp: E, + { entities }: E, block: B, ): NlpPattern[][] { // No nlp entities to check against - if (nlp.entities.length === 0) { + if (entities.length === 0) { return []; } @@ -312,18 +312,21 @@ export class BlockService extends BaseService< } // Filter NLP patterns match based on best guessed entities - return nlpPatterns.filter((entities: NlpPattern[]) => { - return entities.every((ev: NlpPattern) => { - if (ev.match === 'value') { - return nlp.entities.find((e) => { - return e.entity === ev.entity && e.value === ev.value; + return nlpPatterns.filter((patterns: NlpPattern[]) => { + return patterns.every((p: NlpPattern) => { + if (p.match === 'value') { + return entities.find((e) => { + return ( + e.entity === p.entity && + (e.value === p.value || e.canonicalValue === p.value) + ); }); - } else if (ev.match === 'entity') { - return nlp.entities.find((e) => { - return e.entity === ev.entity; + } else if (p.match === 'entity') { + return entities.find((e) => { + return e.entity === p.entity; }); } else { - this.logger.warn('Unknown NLP match type', ev); + this.logger.warn('Unknown NLP match type', p); return false; } }); @@ -429,12 +432,14 @@ export class BlockService extends BaseService< * - Returns `true` if all conditions are met, otherwise `false`. */ private matchesNluEntity( - { entity, value }: E, + { entity, value, canonicalValue }: E, pattern: NlpPattern, ): boolean { return ( entity === pattern.entity && - (pattern.match !== 'value' || value === pattern.value) + (pattern.match !== 'value' || + value === pattern.value || + canonicalValue === pattern.value) ); } diff --git a/api/src/extensions/helpers/llm-nlu/index.helper.ts b/api/src/extensions/helpers/llm-nlu/index.helper.ts index 9834a49b..625f11b1 100644 --- a/api/src/extensions/helpers/llm-nlu/index.helper.ts +++ b/api/src/extensions/helpers/llm-nlu/index.helper.ts @@ -12,7 +12,7 @@ import Handlebars from 'handlebars'; import { HelperService } from '@/helper/helper.service'; import BaseNlpHelper from '@/helper/lib/base-nlp-helper'; -import { LLM, NLU } from '@/helper/types'; +import { HelperType, LLM, NLU } from '@/helper/types'; import { LanguageService } from '@/i18n/services/language.service'; import { LoggerService } from '@/logger/logger.service'; import { NlpEntityFull } from '@/nlp/schemas/nlp-entity.schema'; @@ -66,12 +66,9 @@ export default class LlmNluHelper async buildClassifiersPrompt() { const settings = await this.getSettings(); if (settings) { - const entities = await this.nlpEntityService.findAndPopulate({ + const traitEntities = await this.nlpEntityService.findAndPopulate({ lookups: 'trait', }); - const traitEntities = entities.filter(({ lookups }) => - lookups.includes('trait'), - ); this.traitClassifierPrompts = traitEntities.map((entity) => ({ ...entity, prompt: Handlebars.compile(settings.trait_classifier_prompt_template)({ @@ -88,48 +85,9 @@ export default class LlmNluHelper await this.buildClassifiersPrompt(); } - /** - * Finds entities in a given text based on their values and synonyms. - * - * This function takes a string of text and an array of entities, where each entity contains a value - * and a list of synonyms. It returns an array of objects, each representing an entity found in the text - * along with its start and end positions. - * - * @param text - The input text to search for entities. - * @param entities - An array of entities to search for, each containing a `value` and a list of `synonyms`. - * - * @returns An array of objects representing the found entities, with their `value`, `start`, and `end` positions. - */ - private findKeywordEntities(text: string, entity: NlpEntityFull) { - return ( - entity.values - .flatMap(({ value, expressions }) => { - const allValues = [value, ...expressions]; - - // Filter the terms that are found in the text - return allValues - .flatMap((term) => { - const regex = new RegExp(`\\b${term}\\b`, 'g'); - const matches = [...text.matchAll(regex)]; - - // Map matches to FoundEntity format - return matches.map((match) => ({ - entity: entity.name, - value: term, - start: match.index!, - end: match.index! + term.length, - confidence: 1, - })); - }) - .shift(); - }) - .filter((v) => !!v) || [] - ); - } - async predict(text: string): Promise { const settings = await this.getSettings(); - const helper = await this.helperService.getDefaultLlmHelper(); + const helper = await this.helperService.getDefaultHelper(HelperType.LLM); const defaultLanguage = await this.languageService.getDefaultLanguage(); // Detect language const language = await helper.generateStructuredResponse?.( @@ -174,13 +132,12 @@ export default class LlmNluHelper // Perform slot filling in a deterministic way since // it's currently a challenging task for the LLMs. - const keywordEntities = await this.nlpEntityService.findAndPopulate({ - lookups: 'keywords', + const entities = await this.nlpEntityService.findAndPopulate({ + lookups: { $in: ['keywords', 'pattern'] }, }); - const entities = keywordEntities.flatMap((keywordEntity) => - this.findKeywordEntities(text, keywordEntity), - ) as NLU.ParseEntity[]; - return { entities: traits.concat(entities) }; + const slotEntities = this.runDeterministicSlotFilling(text, entities); + + return { entities: traits.concat(slotEntities) }; } } diff --git a/api/src/helper/lib/__test__/base-nlp-helper.spec.ts b/api/src/helper/lib/__test__/base-nlp-helper.spec.ts index c9509b1d..7cee2ad6 100644 --- a/api/src/helper/lib/__test__/base-nlp-helper.spec.ts +++ b/api/src/helper/lib/__test__/base-nlp-helper.spec.ts @@ -30,6 +30,7 @@ import BaseNlpHelper from '../base-nlp-helper'; const mockLoggerService = { log: jest.fn(), error: jest.fn(), + warn: jest.fn(), } as unknown as LoggerService; const mockSettingService = { @@ -160,7 +161,7 @@ describe('BaseNlpHelper', () => { updatedAt: new Date(), builtin: false, expressions: [], - metadata: [], + metadata: {}, }, value2: { id: new ObjectId().toString(), @@ -170,7 +171,7 @@ describe('BaseNlpHelper', () => { updatedAt: new Date(), builtin: false, expressions: [], - metadata: [], + metadata: {}, }, }); @@ -218,4 +219,253 @@ describe('BaseNlpHelper', () => { ); }); }); + + describe('extractKeywordBasedSlots', () => { + it('should return matches for exact keywords and synonyms', () => { + const entity: NlpEntityFull = { + name: 'color', + values: [ + { value: 'blue', expressions: ['azure', 'navy'] }, + { value: 'green', expressions: ['emerald', 'lime'] }, + ], + } as any; + + const result = helper.extractKeywordBasedSlots( + 'The sky is azure and emerald', + entity, + ); + expect(result).toEqual([ + { + entity: 'color', + value: 'blue', + start: 11, + end: 16, + confidence: 1, + }, + { + entity: 'color', + value: 'green', + start: 21, + end: 28, + confidence: 1, + }, + ]); + }); + + it('should return empty array if no values present', () => { + const result = helper.extractKeywordBasedSlots('anything', { + name: 'empty', + values: [], + } as any); + + expect(result).toEqual([]); + }); + }); + + describe('extractPatternBasedSlots', () => { + it('should match using a valid regex pattern', () => { + const entity: NlpEntityFull = { + name: 'infos', + values: [ + { + value: 'number', + metadata: { pattern: '\\d+', wordBoundary: true }, + }, + ], + } as NlpEntityFull; + + const result = helper.extractPatternBasedSlots( + 'Order 123 and 456 now!', + entity, + ); + expect(result).toEqual([ + { + entity: 'infos', + canonicalValue: 'number', + value: '123', + start: 6, + end: 9, + confidence: 1, + }, + { + entity: 'infos', + canonicalValue: 'number', + value: '456', + start: 14, + end: 17, + confidence: 1, + }, + ]); + }); + + it('should respect metadata like toLowerCase and removeSpaces', () => { + const entity: NlpEntityFull = { + name: 'name', + values: [ + { + value: 'brand', + metadata: { + pattern: 'HEX BOT', + toLowerCase: true, + removeSpaces: true, + }, + }, + ], + } as NlpEntityFull; + + const result = helper.extractPatternBasedSlots( + 'My CODE is HEX BOT!', + entity, + ); + expect(result).toEqual([ + { + entity: 'name', + canonicalValue: 'brand', + value: 'hexbot', + start: 11, + end: 18, + confidence: 1, + }, + ]); + }); + + it('should respect metadata stripDiacritics', () => { + const entity: NlpEntityFull = { + name: 'keyword', + values: [ + { + value: 'word', + metadata: { + pattern: '".+"', + toLowerCase: true, + removeSpaces: true, + stripDiacritics: true, + }, + }, + ], + } as NlpEntityFull; + + const result = helper.extractPatternBasedSlots( + 'The word "où" (where)', + entity, + ); + expect(result).toEqual([ + { + entity: 'keyword', + canonicalValue: 'word', + value: '"ou"', + start: 9, + end: 13, + confidence: 1, + }, + ]); + }); + + it('should return empty array if no values', () => { + const result = helper.extractPatternBasedSlots('test', { + name: 'noop', + values: [], + } as any); + + expect(result).toEqual([]); + }); + + it('should handle invalid regex pattern gracefully', () => { + const entity: NlpEntityFull = { + name: 'fail', + values: [ + { + value: 'Invalid', + metadata: { pattern: '[a-', wordBoundary: true }, + }, + ], + } as any; + + const result = helper.extractPatternBasedSlots('test', entity); + expect(result).toEqual([]); + }); + }); + + describe('runDeterministicSlotFilling', () => { + it('should call keyword-based extractor for keyword lookup strategy', () => { + const mockEntities: NlpEntityFull[] = [ + { + name: 'product', + lookups: ['keywords'], + values: [ + { + value: 'tshirt', + expressions: [], + }, + { + value: 'pizza', + expressions: [], + }, + ], + } as unknown as NlpEntityFull, + ]; + jest.spyOn(helper, 'extractKeywordBasedSlots'); + jest.spyOn(helper, 'extractPatternBasedSlots'); + + const result = helper.runDeterministicSlotFilling( + 'order pizza', + mockEntities, + ); + + expect(helper.extractKeywordBasedSlots).toHaveBeenCalledTimes(1); + expect(helper.extractPatternBasedSlots).not.toHaveBeenCalled(); + expect(result).toHaveLength(1); + expect(result[0].entity).toBe('product'); + }); + + it('should call pattern-based extractor for pattern lookup strategy', () => { + const mockEntities: NlpEntityFull[] = [ + { + name: 'number', + lookups: ['pattern'], + values: [ + { + value: 'phone', + metadata: { pattern: '\\d+' }, + expressions: [], + }, + ], + } as unknown as NlpEntityFull, + ]; + + jest.spyOn(helper, 'extractKeywordBasedSlots'); + jest.spyOn(helper, 'extractPatternBasedSlots'); + + const result = helper.runDeterministicSlotFilling( + 'call me at 1234567890', + mockEntities, + ); + + expect(helper.extractPatternBasedSlots).toHaveBeenCalledTimes(1); + expect(helper.extractKeywordBasedSlots).not.toHaveBeenCalled(); + expect(result).toHaveLength(1); + expect(result[0].entity).toBe('number'); + }); + + it('should skip entities that do not support the selected lookup strategy', () => { + const mockEntities: NlpEntityFull[] = [ + { + name: 'irrelevant', + lookups: ['trait'], + values: [], + } as unknown as NlpEntityFull, + ]; + jest.spyOn(helper, 'extractKeywordBasedSlots'); + jest.spyOn(helper, 'extractPatternBasedSlots'); + + const result = helper.runDeterministicSlotFilling( + 'any text', + mockEntities, + ); + + expect(helper.extractKeywordBasedSlots).not.toHaveBeenCalled(); + expect(helper.extractPatternBasedSlots).not.toHaveBeenCalled(); + expect(result).toHaveLength(0); + }); + }); }); diff --git a/api/src/helper/lib/base-nlp-helper.ts b/api/src/helper/lib/base-nlp-helper.ts index 5b0e8310..5c18b433 100644 --- a/api/src/helper/lib/base-nlp-helper.ts +++ b/api/src/helper/lib/base-nlp-helper.ts @@ -225,4 +225,144 @@ export default abstract class BaseNlpHelper< threshold?: boolean, project?: string, ): Promise; + + /** + * Finds entities in a given text based on their values and synonyms. + * + * This function takes a string of text and an array of entities, where each entity contains a value + * and a list of synonyms. It returns an array of objects, each representing an entity found in the text + * along with its start and end positions. + * + * @param text - The input text to search for entities. + * @param entities - An array of entities to search for, each containing a `value` and a list of `synonyms`. + * + * @returns An array of objects representing the found entities, with their `value`, `start`, and `end` positions. + */ + public extractKeywordBasedSlots( + text: string, + entity: NlpEntityFull, + ): NLU.ParseEntity[] { + if (!entity.values?.length) { + this.logger.warn('NLP entity has no values'); + return []; + } + + return (entity.values + .flatMap(({ value, expressions }) => { + const allValues = [value, ...expressions]; + + // Filter the terms that are found in the text + return allValues + .flatMap((term) => { + const regex = new RegExp(`\\b${term}\\b`, 'g'); + const matches = [...text.matchAll(regex)]; + + // Map matches to FoundEntity format + return matches.map((match) => ({ + entity: entity.name, + value, + start: match.index!, + end: match.index! + term.length, + confidence: 1, + })); + }) + .shift(); + }) + .filter((v) => !!v) || []) as NLU.ParseEntity[]; + } + + /** + * Finds entities in a given text based on regex patterns (stored in `value` field). + * + * @param text - Input text to evaluate. + * @param entity - NlpEntityFull with regex values in `value` and optional metadata. + * @returns An array of matched entities with value, position, and confidence. + */ + public extractPatternBasedSlots( + text: string, + entity: NlpEntityFull, + ): NLU.ParseEntity[] { + if (!entity.values?.length) { + this.logger.warn('NLP entity has no values'); + return []; + } + + return (entity.values + .flatMap((nlpValue) => { + const pattern = nlpValue.metadata?.pattern; + + if (!pattern) { + this.logger.error('Missing NLP regex pattern'); + return []; + } + + let regex: RegExp; + try { + const shouldWrap = nlpValue.metadata?.wordBoundary; + regex = new RegExp(shouldWrap ? `\\b${pattern}\\b` : pattern, 'gi'); + } catch { + this.logger.error('Invalid NLP regex pattern'); + return []; + } + + const matches = [...text.matchAll(regex)]; + + return matches.map((match) => { + let value = match[0]; + + // Apply preprocessing if needed + if (nlpValue.metadata?.removeSpaces) { + value = value.replace(/\s+/g, ''); + } + + if (nlpValue.metadata?.toLowerCase) { + value = value.toLowerCase(); + } + + if (nlpValue.metadata?.stripDiacritics) { + value = value.normalize('NFD').replace(/\p{Diacritic}/gu, ''); + } + + return { + entity: entity.name, + value, + canonicalValue: nlpValue.value, + start: match.index!, + end: match.index! + match[0].length, + confidence: 1, + }; + }); + }) + .filter((v) => !!v) || []) as NLU.ParseEntity[]; + } + + /** + * Extracts slot values from text based on the specified lookup strategy. + * + * This function supports deterministic slot filling by scanning the input text using either + * keyword-based or pattern-based entity recognition, depending on the provided lookup strategy. + * + * - For `keywords`: It uses exact term and synonym matching with word boundaries. + * - For `pattern`: It uses regular expressions defined in each entity value (stored in `value` field), + * optionally applying preprocessing such as `removeSpaces`, `lowercase`, and `stripDiacritics`. + * + * @param text - The input text from which to extract slot values. + * @param entities - An array of NlpEntityFull objects, each containing slot values and metadata. + * + * @returns An array of `ParseEntity` objects containing the entity name, matched value, position, and confidence. + */ + public runDeterministicSlotFilling( + text: string, + entities: NlpEntityFull[], + ): NLU.ParseEntity[] { + return entities.flatMap((e) => { + if (e.lookups.includes('keywords')) { + return this.extractKeywordBasedSlots(text, e); + } else if (e.lookups.includes('pattern')) { + return this.extractPatternBasedSlots(text, e); + } else { + return []; + } + }); + } } diff --git a/api/src/helper/types.ts b/api/src/helper/types.ts index 4e82e493..fd373f85 100644 --- a/api/src/helper/types.ts +++ b/api/src/helper/types.ts @@ -21,6 +21,9 @@ export namespace NLU { confidence: number; start?: number; end?: number; + // When lookup strategy is either 'keywords' or 'pattern', the canonical value + // is the actual NlpValue.value, given the match is either a synonym (expression) or a pattern match + canonicalValue?: string; } export interface ParseEntities { diff --git a/api/src/nlp/controllers/nlp-value.controller.spec.ts b/api/src/nlp/controllers/nlp-value.controller.spec.ts index be6af201..19e80937 100644 --- a/api/src/nlp/controllers/nlp-value.controller.spec.ts +++ b/api/src/nlp/controllers/nlp-value.controller.spec.ts @@ -95,7 +95,7 @@ describe('NlpValueController', () => { entity: nlpEntities[0].id, value: 'valuetest', expressions: ['synonym1', 'synonym2'], - metadata: { firstkey: 'firstvalue', secondKey: 1995 }, + metadata: {}, builtin: false, doc: '', }; diff --git a/api/src/nlp/controllers/nlp-value.controller.ts b/api/src/nlp/controllers/nlp-value.controller.ts index c9d98078..9c61de51 100644 --- a/api/src/nlp/controllers/nlp-value.controller.ts +++ b/api/src/nlp/controllers/nlp-value.controller.ts @@ -71,14 +71,17 @@ export class NlpValueController extends BaseController< async create( @Body() createNlpValueDto: NlpValueCreateDto, ): Promise { + const nlpEntity = createNlpValueDto.entity + ? await this.nlpEntityService.findOne(createNlpValueDto.entity!) + : null; + this.validate({ dto: createNlpValueDto, allowedIds: { - entity: createNlpValueDto.entity - ? (await this.nlpEntityService.findOne(createNlpValueDto.entity))?.id - : null, + entity: nlpEntity?.id, }, }); + return await this.nlpValueService.create(createNlpValueDto); } @@ -171,6 +174,17 @@ export class NlpValueController extends BaseController< @Param('id') id: string, @Body() updateNlpValueDto: NlpValueUpdateDto, ): Promise { + const nlpEntity = updateNlpValueDto.entity + ? await this.nlpEntityService.findOne(updateNlpValueDto.entity!) + : null; + + this.validate({ + dto: updateNlpValueDto, + allowedIds: { + entity: nlpEntity?.id, + }, + }); + return await this.nlpValueService.updateOne(id, updateNlpValueDto); } diff --git a/api/src/nlp/dto/nlp-entity.dto.ts b/api/src/nlp/dto/nlp-entity.dto.ts index d82b6689..c986a707 100644 --- a/api/src/nlp/dto/nlp-entity.dto.ts +++ b/api/src/nlp/dto/nlp-entity.dto.ts @@ -21,7 +21,7 @@ import { import { DtoConfig } from '@/utils/types/dto.types'; -export type Lookup = 'keywords' | 'trait' | 'free-text'; +import { Lookup, LookupStrategy } from '../schemas/types'; export class NlpEntityCreateDto { @ApiProperty({ description: 'Name of the nlp entity', type: String }) @@ -33,10 +33,10 @@ export class NlpEntityCreateDto { @ApiPropertyOptional({ isArray: true, - enum: ['keywords', 'trait', 'free-text'], + enum: Object.values(LookupStrategy), }) @IsArray() - @IsIn(['keywords', 'trait', 'free-text'], { each: true }) + @IsIn(Object.values(LookupStrategy), { each: true }) @IsOptional() lookups?: Lookup[]; diff --git a/api/src/nlp/dto/nlp-value.dto.ts b/api/src/nlp/dto/nlp-value.dto.ts index 5c1c42d9..4f88318e 100644 --- a/api/src/nlp/dto/nlp-value.dto.ts +++ b/api/src/nlp/dto/nlp-value.dto.ts @@ -19,6 +19,8 @@ import { import { DtoConfig } from '@/utils/types/dto.types'; import { IsObjectId } from '@/utils/validation-rules/is-object-id'; +import { NlpMetadata } from '../schemas/types'; + export class NlpValueCreateDto { @ApiProperty({ description: 'Nlp value', type: String }) @IsString() @@ -37,7 +39,7 @@ export class NlpValueCreateDto { @ApiPropertyOptional({ description: 'Nlp value metadata', type: Object }) @IsOptional() @IsObject() - metadata?: Record; + metadata?: NlpMetadata; @ApiPropertyOptional({ description: 'Nlp Value Description', type: String }) @IsString() @@ -82,6 +84,11 @@ export class NlpValueUpdateDto { @IsObjectId({ message: 'Entity must be a valid ObjectId' }) entity?: string | null; + @ApiPropertyOptional({ description: 'Nlp Metadata', type: Object }) + @IsObject() + @IsOptional() + metadata?: NlpMetadata; + @ApiPropertyOptional({ description: 'Nlp Value Description', type: String }) @IsString() @IsOptional() diff --git a/api/src/nlp/schemas/nlp-entity.schema.ts b/api/src/nlp/schemas/nlp-entity.schema.ts index a5879d5d..d0058594 100644 --- a/api/src/nlp/schemas/nlp-entity.schema.ts +++ b/api/src/nlp/schemas/nlp-entity.schema.ts @@ -16,10 +16,8 @@ import { THydratedDocument, } from '@/utils/types/filter.types'; -import { Lookup } from '../dto/nlp-entity.dto'; - import { NlpValue } from './nlp-value.schema'; -import { NlpEntityMap } from './types'; +import { Lookup, LookupStrategy, NlpEntityMap } from './types'; @Schema({ timestamps: true }) export class NlpEntityStub extends BaseSchema { @@ -41,9 +39,18 @@ export class NlpEntityStub extends BaseSchema { name: string; /** - * Lookup strategy can contain : keywords, trait, free-text + * Lookup strategy */ - @Prop({ type: [String], default: ['keywords'] }) + @Prop({ + type: [String], + default: ['keywords'], + validate: { + validator: (lookups: string[]) => + lookups.every((lookup) => + Object.values(LookupStrategy).includes(lookup as LookupStrategy), + ), + }, + }) lookups: Lookup[]; /** diff --git a/api/src/nlp/schemas/nlp-value.schema.ts b/api/src/nlp/schemas/nlp-value.schema.ts index a49eea5c..aef7149a 100644 --- a/api/src/nlp/schemas/nlp-value.schema.ts +++ b/api/src/nlp/schemas/nlp-value.schema.ts @@ -19,9 +19,9 @@ import { import { TStubOrFull } from '@/utils/types/format.types'; import { NlpEntity, NlpEntityFull } from './nlp-entity.schema'; -import { NlpValueMap } from './types'; +import { NlpMetadata, NlpValueMap } from './types'; -@Schema({ timestamps: true }) +@Schema({ timestamps: true, minimize: false }) export class NlpValueStub extends BaseSchema { /** * This value content. @@ -44,8 +44,8 @@ export class NlpValueStub extends BaseSchema { /** * Metadata are additional data that can be associated to this values, most of the time, the metadata contains system values or ids (e.g: value: "coffee", metadata: "item_11") . */ - @Prop({ type: JSON, default: {} }) - metadata: Record; + @Prop({ type: JSON, default: () => {} }) + metadata?: NlpMetadata; /** * Description of the entity's value purpose. diff --git a/api/src/nlp/schemas/types.ts b/api/src/nlp/schemas/types.ts index 6e87dee3..d8408bdf 100644 --- a/api/src/nlp/schemas/types.ts +++ b/api/src/nlp/schemas/types.ts @@ -9,6 +9,15 @@ import { NlpEntityFull, NlpEntityStub } from './nlp-entity.schema'; import { NlpValueStub } from './nlp-value.schema'; +export enum LookupStrategy { + keywords = 'keywords', + trait = 'trait', + free_text = 'free-text', + pattern = 'pattern', +} + +export type Lookup = `${LookupStrategy}`; + export interface NlpSampleEntityValue { entity: string; // entity name value: string; // entity value @@ -27,3 +36,12 @@ export enum NlpSampleState { } export type NlpCacheMap = Map; + +export type NlpMetadata = { + // Required when lookups is "pattern" + pattern?: string; + wordBoundary?: boolean; + removeSpaces?: boolean; + toLowerCase?: boolean; + stripDiacritics?: boolean; +}; diff --git a/api/src/nlp/services/nlp-entity.service.ts b/api/src/nlp/services/nlp-entity.service.ts index 0f38c920..9e6025f2 100644 --- a/api/src/nlp/services/nlp-entity.service.ts +++ b/api/src/nlp/services/nlp-entity.service.ts @@ -15,14 +15,14 @@ import { NLP_MAP_CACHE_KEY } from '@/utils/constants/cache'; import { Cacheable } from '@/utils/decorators/cacheable.decorator'; import { BaseService } from '@/utils/generics/base-service'; -import { Lookup, NlpEntityDto } from '../dto/nlp-entity.dto'; +import { NlpEntityDto } from '../dto/nlp-entity.dto'; import { NlpEntityRepository } from '../repositories/nlp-entity.repository'; import { NlpEntity, NlpEntityFull, NlpEntityPopulate, } from '../schemas/nlp-entity.schema'; -import { NlpCacheMap, NlpSampleEntityValue } from '../schemas/types'; +import { Lookup, NlpCacheMap, NlpSampleEntityValue } from '../schemas/types'; import { NlpValueService } from './nlp-value.service'; diff --git a/api/src/nlp/services/nlp.service.ts b/api/src/nlp/services/nlp.service.ts index 6a779648..1c2c1f34 100644 --- a/api/src/nlp/services/nlp.service.ts +++ b/api/src/nlp/services/nlp.service.ts @@ -51,6 +51,7 @@ export class NlpService { .filter(({ entity }) => nlpMap.has(entity)) .map((e) => { const entity = nlpMap.get(e.entity)!; + return { ...e, score: e.confidence * (entity.weight || 1), diff --git a/api/src/utils/test/mocks/block.ts b/api/src/utils/test/mocks/block.ts index c1c45611..32a26f70 100644 --- a/api/src/utils/test/mocks/block.ts +++ b/api/src/utils/test/mocks/block.ts @@ -294,6 +294,14 @@ export const mockNlpGreetingAnyNamePatterns: NlpPattern[] = [ }, ]; +export const mockNlpFirstNamePatterns: NlpPattern[] = [ + { + entity: 'firstname', + match: 'value', + value: 'jhon', + }, +]; + export const mockModifiedNlpBlock: BlockFull = { ...baseBlockInstance, name: 'Modified Mock Nlp', diff --git a/api/src/utils/test/mocks/nlp.ts b/api/src/utils/test/mocks/nlp.ts index f7f3b5ca..d7a99011 100644 --- a/api/src/utils/test/mocks/nlp.ts +++ b/api/src/utils/test/mocks/nlp.ts @@ -44,3 +44,14 @@ export const mockNlpGreetingFullNameEntities: NLU.ParseEntities = { }, ], }; + +export const mockNlpFirstNameEntities: NLU.ParseEntities = { + entities: [ + { + entity: 'firstname', + value: 'jhonny', + canonicalValue: 'jhon', + confidence: 0.75, + }, + ], +}; diff --git a/frontend/public/locales/en/translation.json b/frontend/public/locales/en/translation.json index f3633259..e3ed3a75 100644 --- a/frontend/public/locales/en/translation.json +++ b/frontend/public/locales/en/translation.json @@ -351,6 +351,10 @@ "doc": "Documentation", "builtin": "Built-in?", "weight": "Weight", + "word_boundary": "Word boundary", + "remove_spaces": "Remove spaces", + "to_lower_case": "Lowercase", + "strip_diacritics": "Strip diacritics", "dataset": "Dataset", "yes": "Yes", "no": "No", diff --git a/frontend/public/locales/fr/translation.json b/frontend/public/locales/fr/translation.json index 5ff646ea..ad69c256 100644 --- a/frontend/public/locales/fr/translation.json +++ b/frontend/public/locales/fr/translation.json @@ -350,6 +350,10 @@ "synonyms": "Synonymes", "doc": "Documentation", "weight": "Poids", + "word_boundary": "Délimiter (Mot)", + "remove_spaces": "Supprimer les espaces", + "to_lower_case": "Mettre en minucules", + "strip_diacritics": "Supprimer les accents", "builtin": "Intégré?", "dataset": "Données", "yes": "Oui", diff --git a/frontend/src/app-components/inputs/RegexInput.tsx b/frontend/src/app-components/inputs/RegexInput.tsx index 976c0474..20c3cc2f 100644 --- a/frontend/src/app-components/inputs/RegexInput.tsx +++ b/frontend/src/app-components/inputs/RegexInput.tsx @@ -1,5 +1,5 @@ /* - * Copyright © 2024 Hexastack. All rights reserved. + * Copyright © 2025 Hexastack. All rights reserved. * * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms: * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission. @@ -7,17 +7,18 @@ */ import { InputAdornment, TextFieldProps } from "@mui/material"; -import React, { ForwardedRef, forwardRef } from "react"; +import { ForwardedRef, forwardRef } from "react"; import { Input } from "./Input"; export const RegexInput = forwardRef( ( { - onChange, - value, + flags = ["g", "i"], ...props - }: TextFieldProps & { value: string; onChange: (value: string) => void }, + }: TextFieldProps & { + flags?: string[]; + }, ref: ForwardedRef, ) => { return ( @@ -26,15 +27,13 @@ export const RegexInput = forwardRef( {...props} InputProps={{ startAdornment: /, - endAdornment: /gi, - }} - value={value} - onChange={(e) => { - onChange(`/${e.target.value}/`); + endAdornment: ( + /{flags.join("")} + ), }} /> ); }, ); -RegexInput.displayName = "Input"; +RegexInput.displayName = "RegexInput"; diff --git a/frontend/src/app-components/inputs/Selectable.tsx b/frontend/src/app-components/inputs/Selectable.tsx index 70fac37b..09ab6197 100644 --- a/frontend/src/app-components/inputs/Selectable.tsx +++ b/frontend/src/app-components/inputs/Selectable.tsx @@ -6,11 +6,22 @@ * 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file). */ -import { Box, CircularProgress, Input, styled } from "@mui/material"; +import { Box, CircularProgress, Input, styled, Tooltip } from "@mui/material"; import randomSeed from "random-seed"; -import { FC, useCallback, useEffect, useMemo, useRef, useState } from "react"; +import { + CSSProperties, + FC, + useCallback, + useEffect, + useMemo, + useRef, + useState, +} from "react"; -import { INlpDatasetKeywordEntity } from "../../types/nlp-sample.types"; +import { + INlpDatasetKeywordEntity, + INlpDatasetPatternEntity, +} from "../../types/nlp-sample.types"; const SelectableBox = styled(Box)({ position: "relative", @@ -40,22 +51,62 @@ const COLORS = [ { name: "orange", bg: "#E6A23C" }, ]; const UNKNOWN_COLOR = { name: "grey", bg: "#aaaaaa" }; -const TODAY = new Date().toDateString(); -const getColor = (no: number) => { - const rand = randomSeed.create(TODAY); +const NOW = (+new Date()).toString(); +const getColor = (no: number, seedPrefix: string = "") => { + const rand = randomSeed.create(seedPrefix + NOW); const startIndex = rand(COLORS.length); const color = no < 0 ? UNKNOWN_COLOR : COLORS[(startIndex + no) % COLORS.length]; return { backgroundColor: color.bg, - opacity: 0.3, + opacity: 0.2, }; }; +interface INlpSelectionEntity { + start: string; + entity: string; + value: string; + end: string; + style: CSSProperties; +} +const SelectionEntityBackground: React.FC<{ + selectionEntity: INlpSelectionEntity; +}> = ({ selectionEntity: e }) => { + return ( +
+ {e.start} + + {e.value} + + {e.end} +
+ ); +}; + type SelectableProps = { defaultValue?: string; - entities?: INlpDatasetKeywordEntity[]; + keywordEntities?: INlpDatasetKeywordEntity[]; + patternEntities?: INlpDatasetPatternEntity[]; placeholder?: string; onSelect: (str: string, start: number, end: number) => void; onChange: (sample: { @@ -65,9 +116,27 @@ type SelectableProps = { loading?: boolean; }; +const buildSelectionEntities = ( + text: string, + entities: INlpDatasetKeywordEntity[] | INlpDatasetPatternEntity[], +): INlpSelectionEntity[] => { + return entities?.map((e, index) => { + const start = e.start ? e.start : text.indexOf(e.value); + const end = e.end ? e.end : start + e.value.length; + + return { + start: text.substring(0, start), + entity: e.entity, + value: text.substring(start, end), + end: text.substring(end), + style: getColor(e.entity ? index : -1, e.entity), + }; + }); +}; const Selectable: FC = ({ defaultValue, - entities = [], + keywordEntities = [], + patternEntities = [], placeholder = "", onChange, onSelect, @@ -76,20 +145,13 @@ const Selectable: FC = ({ const [text, setText] = useState(defaultValue || ""); const editableRef = useRef(null); const selectableRef = useRef(null); - const selectedEntities = useMemo( - () => - entities?.map((e, index) => { - const start = e.start ? e.start : text.indexOf(e.value); - const end = e.end ? e.end : start + e.value.length; - - return { - start: text.substring(0, start), - value: text.substring(start, end), - end: text.substring(end), - style: getColor(e.entity ? index : -1), - }; - }), - [entities, text], + const selectedKeywordEntities = useMemo( + () => buildSelectionEntities(text, keywordEntities), + [keywordEntities, text], + ); + const selectedPatternEntities = useMemo( + () => buildSelectionEntities(text, patternEntities), + [patternEntities, text], ); useEffect(() => { @@ -143,7 +205,7 @@ const Selectable: FC = ({ const handleTextChange = useCallback( (newText: string) => { const oldText = text; - const oldEntities = [...entities]; + const oldEntities = [...keywordEntities]; const newEntities: INlpDatasetKeywordEntity[] = []; const findCharDiff = (oldStr: string, newStr: string): number => { const minLength = Math.min(oldStr.length, newStr.length); @@ -187,17 +249,22 @@ const Selectable: FC = ({ onChange({ text: newText, entities: newEntities }); }, - [text, onChange, entities], + [text, onChange, keywordEntities], ); return ( - {selectedEntities?.map((e, idx) => ( -
- {e.start} - {e.value} - {e.end} -
+ {selectedPatternEntities?.map((e, idx) => ( + + ))} + {selectedKeywordEntities?.map((e, idx) => ( + ))} > = ({ @@ -94,27 +94,30 @@ export const NlpEntityVarForm: FC> = ({
- {!nlpEntity ? ( - - - {t("label.lookup_strategies")} - - {Object.values(NlpLookups).map((nlpLookup, index) => ( - } - label={nlpLookup} - /> - ))} - - - - ) : null} + + + {t("label.lookup_strategies")} + + {Object.values(LookupStrategy).map((nlpLookup, index) => ( + + } + label={nlpLookup} + /> + ))} + + + > = ({ label={t("label.doc")} {...register("doc")} multiline={true} + rows={3} disabled={nlpEntity?.builtin} /> - + > = ({ }} error={!!errors.weight} helperText={errors.weight?.message} - /> - + /> +
diff --git a/frontend/src/components/nlp/components/NlpTrainForm.tsx b/frontend/src/components/nlp/components/NlpTrainForm.tsx index 470124a3..fb9873de 100644 --- a/frontend/src/components/nlp/components/NlpTrainForm.tsx +++ b/frontend/src/components/nlp/components/NlpTrainForm.tsx @@ -30,15 +30,16 @@ import { ContentContainer, ContentItem } from "@/app-components/dialogs"; import AutoCompleteEntitySelect from "@/app-components/inputs/AutoCompleteEntitySelect"; import AutoCompleteSelect from "@/app-components/inputs/AutoCompleteSelect"; import Selectable from "@/app-components/inputs/Selectable"; -import { useFind } from "@/hooks/crud/useFind"; import { useGetFromCache } from "@/hooks/crud/useGet"; import { useApiClient } from "@/hooks/useApiClient"; +import { useNlp } from "@/hooks/useNlp"; import { useTranslate } from "@/hooks/useTranslate"; import { EntityType, Format } from "@/services/types"; import { ILanguage } from "@/types/language.types"; import { INlpEntity } from "@/types/nlp-entity.types"; import { INlpDatasetKeywordEntity, + INlpDatasetPatternEntity, INlpDatasetSample, INlpDatasetTraitEntity, INlpSampleFormAttributes, @@ -56,39 +57,32 @@ const NlpDatasetSample: FC = ({ submitForm, }) => { const { t } = useTranslate(); - const { data: entities, refetch: refetchEntities } = useFind( - { - entity: EntityType.NLP_ENTITY, - format: Format.FULL, - }, - { - hasCount: false, - }, - ); + const { + allTraitEntities, + allKeywordEntities, + allPatternEntities, + refetchAllEntities, + } = useNlp(); const getNlpValueFromCache = useGetFromCache(EntityType.NLP_VALUE); - // eslint-disable-next-line react-hooks/exhaustive-deps const defaultValues: INlpSampleFormAttributes = useMemo( () => ({ type: sample?.type || NlpSampleType.train, text: sample?.text || "", language: sample?.language || null, - traitEntities: (entities || []) - .filter(({ lookups }) => { - return lookups.includes("trait"); - }) - .map((e) => { - return { - entity: e.name, - value: sample - ? sample.entities.find(({ entity }) => entity === e.name)?.value - : "", - } as INlpDatasetTraitEntity; - }), - keywordEntities: (sample?.entities || []).filter( - (e) => "start" in e && typeof e.start === "number", + traitEntities: [...allTraitEntities.values()].map((e) => { + return { + entity: e.name, + value: + (sample?.entities || []).find((se) => se.entity === e.name) + ?.value || "", + }; + }) as INlpDatasetTraitEntity[], + keywordEntities: (sample?.entities || []).filter((e) => + allKeywordEntities.has(e.entity), ) as INlpDatasetKeywordEntity[], }), - [sample, entities], + // eslint-disable-next-line react-hooks/exhaustive-deps + [allKeywordEntities, allTraitEntities, JSON.stringify(sample)], ); const { handleSubmit, control, register, reset, setValue, watch } = useForm({ @@ -97,6 +91,9 @@ const NlpDatasetSample: FC = ({ const currentText = watch("text"); const currentType = watch("type"); const { apiClient } = useApiClient(); + const [patternEntities, setPatternEntities] = useState< + INlpDatasetPatternEntity[] + >([]); const { fields: traitEntities, update: updateTraitEntity } = useFieldArray({ control, name: "traitEntities", @@ -122,22 +119,29 @@ const NlpDatasetSample: FC = ({ queryFn: async () => { return await apiClient.predictNlp(currentText); }, - onSuccess: (result) => { - const traitEntities: INlpDatasetTraitEntity[] = result.entities.filter( - (e) => !("start" in e && "end" in e) && e.entity !== "language", - ); - const keywordEntities = result.entities.filter( - (e) => "start" in e && "end" in e, + onSuccess: (prediction) => { + const predictedTraitEntities: INlpDatasetTraitEntity[] = + prediction.entities.filter((e) => allTraitEntities.has(e.entity)); + const predictedKeywordEntities = prediction.entities.filter((e) => + allKeywordEntities.has(e.entity), ) as INlpDatasetKeywordEntity[]; - const language = result.entities.find( + const predictedPatternEntities = prediction.entities.filter((e) => + allPatternEntities.has(e.entity), + ) as INlpDatasetKeywordEntity[]; + const language = prediction.entities.find( ({ entity }) => entity === "language", ); setValue("language", language?.value || ""); - setValue("traitEntities", traitEntities); - setValue("keywordEntities", keywordEntities); + setValue("traitEntities", predictedTraitEntities); + setValue("keywordEntities", predictedKeywordEntities); + setPatternEntities(predictedPatternEntities); }, - enabled: !sample && !!currentText, + enabled: + // Inbox sample update + sample?.type === "inbox" || + // New sample + (!sample && !!currentText), }); const findInsertIndex = (newItem: INlpDatasetKeywordEntity): number => { const index = keywordEntities.findIndex( @@ -153,7 +157,7 @@ const NlpDatasetSample: FC = ({ } | null>(null); const onSubmitForm = (form: INlpSampleFormAttributes) => { submitForm(form); - refetchEntities(); + refetchAllEntities(); reset({ ...defaultValues, text: "", @@ -203,7 +207,8 @@ const NlpDatasetSample: FC = ({ { setSelection({ @@ -223,11 +228,13 @@ const NlpDatasetSample: FC = ({ end, })), ); + setPatternEntities([]); }} loading={isLoading} /> + {/* Language selection */} = ({ }} /> + {/* Trait entities */} {traitEntities.map((traitEntity, index) => ( = ({ control={control} render={({ field }) => { const { onChange: _, value, ...rest } = field; - const entity = entities?.find( - ({ name }) => name === traitEntity.entity, - ); - const options = - entity?.values.map( - (v) => getNlpValueFromCache(v) as INlpValue, - ) || []; + const options = ( + allTraitEntities.get(traitEntity.entity)?.values || [] + ).map((v) => getNlpValueFromCache(v)!); return ( <> @@ -318,7 +322,9 @@ const NlpDatasetSample: FC = ({ ))} - + { + /* Keyword entities */ + } {keywordEntities.map((keywordEntity, index) => ( = ({ control={control} render={({ field }) => { const { onChange: _, ...rest } = field; + const options = [...allKeywordEntities.values()]; return ( - + fullWidth={true} - searchFields={["name"]} - entity={EntityType.NLP_ENTITY} - format={Format.FULL} + options={options} idKey="name" labelKey="name" label={t("label.nlp_entity")} multiple={false} - preprocess={(options) => { - return options.filter(({ lookups }) => - lookups.includes("keywords"), - ); - }} onChange={(_e, selected, ..._) => { updateKeywordEntity(index, { ...keywordEntities[index], @@ -367,13 +367,9 @@ const NlpDatasetSample: FC = ({ control={control} render={({ field }) => { const { onChange: _, value, ...rest } = field; - const entity = entities?.find( - ({ name }) => name === keywordEntity.entity, - ); - const options = - entity?.values.map( - (v) => getNlpValueFromCache(v) as INlpValue, - ) || []; + const options = ( + allKeywordEntities.get(keywordEntity.entity)?.values || [] + ).map((v) => getNlpValueFromCache(v)!); return ( { + if (nlpEntity?.lookups.includes(LookupStrategy.pattern)) { + return { + pattern: "", + wordBoundary: true, + removeSpaces: false, + toLowerCase: false, + stripDiacritics: false, + }; + } else { + return {}; + } +}; export const NlpValueForm: FC> = ({ data: { defaultValues: nlpValue, presetValues: nlpEntity }, @@ -36,7 +59,8 @@ export const NlpValueForm: FC> = ({ entity: EntityType.NLP_ENTITY, format: Format.FULL, }); - const canHaveSynonyms = nlpEntity?.lookups.includes(NlpLookups.keywords); + const canHaveSynonyms = nlpEntity?.lookups.includes(LookupStrategy.keywords); + const isPattern = nlpEntity?.lookups.includes(LookupStrategy.pattern); const { mutate: createNlpValue } = useCreate(EntityType.NLP_VALUE, { onError: () => { rest.onError?.(); @@ -73,15 +97,9 @@ export const NlpValueForm: FC> = ({ value: nlpValue?.value || "", doc: nlpValue?.doc || "", expressions: nlpValue?.expressions || [], + metadata: nlpValue?.metadata || getDefaultNlpMetadata(nlpEntity), }, }); - const validationRules = { - value: { - required: t("message.value_is_required"), - }, - name: {}, - description: {}, - }; const onSubmitForm = async (params: INlpValueAttributes) => { if (nlpValue) { updateNlpValue({ id: nlpValue.id, params }); @@ -96,11 +114,17 @@ export const NlpValueForm: FC> = ({ value: nlpValue.value, expressions: nlpValue.expressions, doc: nlpValue.doc, + metadata: nlpValue.metadata, }); } else { - reset(); + reset({ + value: "", + expressions: [], + doc: "", + metadata: getDefaultNlpMetadata(nlpEntity), + }); } - }, [nlpValue, reset]); + }, [nlpValue, nlpEntity, reset]); return ( @@ -112,15 +136,87 @@ export const NlpValueForm: FC> = ({ error={!!errors.value} required autoFocus - helperText={errors.value ? errors.value.message : null} - {...register("value", validationRules.value)} + helperText={errors.value?.message} + {...register("value", { + required: t("message.value_is_required"), + })} /> + {isPattern && ( + <> + + { + return isRegex(pattern) + ? true + : t("message.regex_is_invalid"); + }, + })} + helperText={errors.metadata?.pattern?.message} + error={!!errors.metadata?.pattern} + label={t("label.regex")} + placeholder={t("placeholder.pattern")} + flags={["i"]} + /> + + + ( + } + label={t("label.word_boundary")} + /> + )} + /> + + + ( + } + label={t("label.remove_spaces")} + /> + )} + /> + + + ( + } + label={t("label.to_lower_case")} + /> + )} + /> + + + ( + } + label={t("label.strip_diacritics")} + /> + )} + /> + + + )} diff --git a/frontend/src/components/visual-editor/form/inputs/triggers/PatternInput.tsx b/frontend/src/components/visual-editor/form/inputs/triggers/PatternInput.tsx index 753a12c6..8d0b6b3f 100644 --- a/frontend/src/components/visual-editor/form/inputs/triggers/PatternInput.tsx +++ b/frontend/src/components/visual-editor/form/inputs/triggers/PatternInput.tsx @@ -22,15 +22,18 @@ import { PatternType, PayloadPattern, } from "@/types/block.types"; +import { + extractRegexBody, + formatWithSlashes, + isRegex, + isRegexString, +} from "@/utils/string"; import { OutcomeInput } from "./OutcomeInput"; import { PostbackInput } from "./PostbackInput"; -const isRegex = (str: Pattern) => { - return typeof str === "string" && str.startsWith("/") && str.endsWith("/"); -}; -const getType = (pattern: Pattern): PatternType => { - if (isRegex(pattern)) { +const getPatternType = (pattern: Pattern): PatternType => { + if (isRegexString(pattern)) { return "regex"; } else if (Array.isArray(pattern)) { return "nlp"; @@ -69,7 +72,7 @@ const PatternInput: FC = ({ formState: { errors }, } = useFormContext(); const [pattern, setPattern] = useState(value); - const patternType = getType(value); + const patternType = getPatternType(value); const registerInput = ( errorMessage: string, idx: number, @@ -122,23 +125,15 @@ const PatternInput: FC = ({ { - try { - const parsedPattern = new RegExp(pattern.slice(1, -1)); - - if (String(parsedPattern) !== pattern) { - throw t("message.regex_is_invalid"); - } - - return true; - } catch (_e) { - return t("message.regex_is_invalid"); - } + return isRegex(extractRegexBody(pattern)) + ? true + : t("message.regex_is_invalid"); }, - setValueAs: (v) => (isRegex(v) ? v : `/${v}/`), + setValueAs: (v) => (isRegexString(v) ? v : formatWithSlashes(v)), })} + value={extractRegexBody(value)} label={t("label.regex")} - value={value.slice(1, -1)} - onChange={(v) => onChange(v)} + onChange={(e) => onChange(formatWithSlashes(e.target.value))} required /> ) : null} diff --git a/frontend/src/hooks/useNlp.tsx b/frontend/src/hooks/useNlp.tsx new file mode 100644 index 00000000..99220771 --- /dev/null +++ b/frontend/src/hooks/useNlp.tsx @@ -0,0 +1,55 @@ +/* + * Copyright © 2025 Hexastack. All rights reserved. + * + * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms: + * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission. + * 2. All derivative works must include clear attribution to the original creator and software, Hexastack and Hexabot, in a prominent location (e.g., in the software's "About" section, documentation, and README file). + */ + +import { useMemo } from "react"; + +import { EntityType, Format } from "@/services/types"; +import { INlpEntity, Lookup } from "@/types/nlp-entity.types"; + +import { useFind } from "./crud/useFind"; + +const buildNlpEntityMap = (entities: INlpEntity[], lookup: Lookup) => { + const intialMap = new Map(); + + return entities + .filter(({ lookups }) => { + return lookups.includes(lookup); + }).reduce((acc, curr) => { + acc.set(curr.name, curr); + + return acc; + }, intialMap) +} + +export const useNlp = () => { + const { data: allEntities, refetch: refetchAllEntities } = useFind( + { + entity: EntityType.NLP_ENTITY, + format: Format.FULL, + }, + { + hasCount: false, + }, + ); + const allTraitEntities = useMemo(() => { + return buildNlpEntityMap((allEntities || []), 'trait') + }, [allEntities]); + const allKeywordEntities = useMemo(() => { + return buildNlpEntityMap((allEntities || []), 'keywords') + }, [allEntities]); + const allPatternEntities = useMemo(() => { + return buildNlpEntityMap((allEntities || []), 'pattern') + }, [allEntities]); + + return { + allTraitEntities, + allKeywordEntities, + allPatternEntities, + refetchAllEntities + } +}; diff --git a/frontend/src/types/nlp-entity.types.ts b/frontend/src/types/nlp-entity.types.ts index 97c0ddce..b7de9313 100644 --- a/frontend/src/types/nlp-entity.types.ts +++ b/frontend/src/types/nlp-entity.types.ts @@ -11,7 +11,23 @@ import { EntityType, Format } from "@/services/types"; import { IBaseSchema, IFormat, OmitPopulate } from "./base.types"; import { INlpValue } from "./nlp-value.types"; -export type Lookup = "keywords" | "trait" | "free-text"; +export enum LookupStrategy { + keywords = "keywords", + trait = "trait", + // free_text = "free-text", + pattern = "pattern", +} + +export type Lookup = `${LookupStrategy}`; + +export interface INlpMetadata { + // Required when lookups is "pattern" + pattern?: string; + wordBoundary?: boolean; + removeSpaces?: boolean; + toLowerCase?: boolean; + stripDiacritics?: boolean; +} export interface INlpEntityAttributes { foreign_id?: string; @@ -22,11 +38,6 @@ export interface INlpEntityAttributes { weight?: number; } -export enum NlpLookups { - keywords = "keywords", - trait = "trait", -} - export interface INlpEntityStub extends IBaseSchema, OmitPopulate {} diff --git a/frontend/src/types/nlp-sample.types.ts b/frontend/src/types/nlp-sample.types.ts index 8069b95a..1884ce85 100644 --- a/frontend/src/types/nlp-sample.types.ts +++ b/frontend/src/types/nlp-sample.types.ts @@ -1,5 +1,5 @@ /* - * Copyright © 2024 Hexastack. All rights reserved. + * Copyright © 2025 Hexastack. All rights reserved. * * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms: * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission. @@ -52,6 +52,8 @@ export interface INlpDatasetKeywordEntity extends INlpDatasetTraitEntity { end: number; } +export interface INlpDatasetPatternEntity extends INlpDatasetKeywordEntity {} + export interface INlpSampleFormAttributes extends Omit { traitEntities: INlpDatasetTraitEntity[]; diff --git a/frontend/src/types/nlp-value.types.ts b/frontend/src/types/nlp-value.types.ts index 4986ee8d..7b0158c9 100644 --- a/frontend/src/types/nlp-value.types.ts +++ b/frontend/src/types/nlp-value.types.ts @@ -9,7 +9,7 @@ import { Format } from "@/services/types"; import { IBaseSchema, IFormat } from "./base.types"; -import { INlpEntity } from "./nlp-entity.types"; +import { INlpEntity, INlpMetadata } from "./nlp-entity.types"; export interface INlpValueAttributes { entity: string; @@ -17,7 +17,7 @@ export interface INlpValueAttributes { value: string; doc?: string; expressions?: string[]; - metadata?: Record; + metadata?: INlpMetadata; builtin?: boolean; nlpSamplesCount?: number; } diff --git a/frontend/src/utils/string.ts b/frontend/src/utils/string.ts index 77fd41d4..3600b89b 100644 --- a/frontend/src/utils/string.ts +++ b/frontend/src/utils/string.ts @@ -1,5 +1,5 @@ /* - * Copyright © 2024 Hexastack. All rights reserved. + * Copyright © 2025 Hexastack. All rights reserved. * * Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms: * 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission. @@ -18,3 +18,49 @@ export const slugify = (str: string) => { export const getNamespace = (extensionName: string) => { return extensionName.replaceAll("-", "_"); }; + +/** + * Checks if the string starts/ends with slashes + */ +export const isRegexString = (str: any) => { + return typeof str === "string" && str.startsWith("/") && str.endsWith("/"); +}; + +/** + * Ensures value is wrapped in slashes: /value/ + */ +export const formatWithSlashes = (value: string): string => { + if (!value) return "/"; + if (!value.startsWith("/")) value = "/" + value; + if (!value.endsWith("/")) value = value + "/"; + + return value; +}; + +/** + * Extracts the inner regex from /.../ + */ +export const extractRegexBody = (value: string | undefined): string => { + if (value && value.startsWith("/") && value.endsWith("/")) { + return value.slice(1, -1); + } + + return ''; +}; + +/** + * Checks if the regex pattern compiles correctly + */ +export const isRegex = (pattern: string | undefined) => { + try { + if (!pattern) { + throw new Error("Pattern was not provided!"); + } + + new RegExp(pattern); + + return true; + } catch { + return false; + } +};