From e85084cd4101a2799bb33e2129349f62d4144fef Mon Sep 17 00:00:00 2001 From: Mohamed Marrouchi Date: Tue, 13 May 2025 17:07:28 +0100 Subject: [PATCH] fix: add canonical value check --- api/src/chat/services/block.service.spec.ts | 21 +++++++++++++++++ api/src/chat/services/block.service.ts | 23 +++++++++++-------- .../lib/__test__/base-nlp-helper.spec.ts | 20 +++++++++------- api/src/helper/lib/base-nlp-helper.ts | 13 ++++++----- api/src/helper/types.ts | 3 +++ api/src/nlp/services/nlp.service.ts | 1 + api/src/utils/test/mocks/block.ts | 8 +++++++ api/src/utils/test/mocks/nlp.ts | 11 +++++++++ 8 files changed, 77 insertions(+), 23 deletions(-) diff --git a/api/src/chat/services/block.service.spec.ts b/api/src/chat/services/block.service.spec.ts index 432bf5e3..cc94f34e 100644 --- a/api/src/chat/services/block.service.spec.ts +++ b/api/src/chat/services/block.service.spec.ts @@ -59,6 +59,7 @@ import { blockProductListMock, blocks, mockNlpAffirmationPatterns, + mockNlpFirstNamePatterns, mockNlpGreetingAnyNamePatterns, mockNlpGreetingNamePatterns, mockNlpGreetingPatterns, @@ -69,6 +70,7 @@ import { subscriberContextBlankInstance, } from '@/utils/test/mocks/conversation'; import { + mockNlpFirstNameEntities, mockNlpGreetingFullNameEntities, mockNlpGreetingNameEntities, } from '@/utils/test/mocks/nlp'; @@ -353,6 +355,25 @@ describe('BlockService', () => { ]); }); + it('should return match nlp patterns with synonyms match (canonical value)', () => { + const result = blockService.getMatchingNluPatterns( + mockNlpFirstNameEntities, + { + ...blockGetStarted, + patterns: [...blockGetStarted.patterns, mockNlpFirstNamePatterns], + }, + ); + expect(result).toEqual([ + [ + { + entity: 'firstname', + match: 'value', + value: 'jhon', + }, + ], + ]); + }); + it('should return empty array when it does not match nlp patterns', () => { const result = blockService.getMatchingNluPatterns( mockNlpGreetingFullNameEntities, diff --git a/api/src/chat/services/block.service.ts b/api/src/chat/services/block.service.ts index 0853d2b2..60fddde5 100644 --- a/api/src/chat/services/block.service.ts +++ b/api/src/chat/services/block.service.ts @@ -294,11 +294,11 @@ export class BlockService extends BaseService< * @returns The NLU patterns that matches the predicted entities */ getMatchingNluPatterns( - nlp: E, + { entities }: E, block: B, ): NlpPattern[][] { // No nlp entities to check against - if (nlp.entities.length === 0) { + if (entities.length === 0) { return []; } @@ -312,14 +312,17 @@ export class BlockService extends BaseService< } // Filter NLP patterns match based on best guessed entities - return nlpPatterns.filter((entities: NlpPattern[]) => { - return entities.every((ev: NlpPattern) => { + return nlpPatterns.filter((patterns: NlpPattern[]) => { + return patterns.every((ev: NlpPattern) => { if (ev.match === 'value') { - return nlp.entities.find((e) => { - return e.entity === ev.entity && e.value === ev.value; + return entities.find((e) => { + return ( + e.entity === ev.entity && + (e.value === ev.value || e.canonicalValue === ev.value) + ); }); } else if (ev.match === 'entity') { - return nlp.entities.find((e) => { + return entities.find((e) => { return e.entity === ev.entity; }); } else { @@ -429,12 +432,14 @@ export class BlockService extends BaseService< * - Returns `true` if all conditions are met, otherwise `false`. */ private matchesNluEntity( - { entity, value }: E, + { entity, value, canonicalValue }: E, pattern: NlpPattern, ): boolean { return ( entity === pattern.entity && - (pattern.match !== 'value' || value === pattern.value) + (pattern.match !== 'value' || + value === pattern.value || + canonicalValue === pattern.value) ); } diff --git a/api/src/helper/lib/__test__/base-nlp-helper.spec.ts b/api/src/helper/lib/__test__/base-nlp-helper.spec.ts index e2b958d9..7cee2ad6 100644 --- a/api/src/helper/lib/__test__/base-nlp-helper.spec.ts +++ b/api/src/helper/lib/__test__/base-nlp-helper.spec.ts @@ -265,14 +265,14 @@ describe('BaseNlpHelper', () => { describe('extractPatternBasedSlots', () => { it('should match using a valid regex pattern', () => { const entity: NlpEntityFull = { - name: 'number', + name: 'infos', values: [ { value: 'number', metadata: { pattern: '\\d+', wordBoundary: true }, }, ], - } as any; + } as NlpEntityFull; const result = helper.extractPatternBasedSlots( 'Order 123 and 456 now!', @@ -280,14 +280,16 @@ describe('BaseNlpHelper', () => { ); expect(result).toEqual([ { - entity: 'number', + entity: 'infos', + canonicalValue: 'number', value: '123', start: 6, end: 9, confidence: 1, }, { - entity: 'number', + entity: 'infos', + canonicalValue: 'number', value: '456', start: 14, end: 17, @@ -298,10 +300,10 @@ describe('BaseNlpHelper', () => { it('should respect metadata like toLowerCase and removeSpaces', () => { const entity: NlpEntityFull = { - name: 'code', + name: 'name', values: [ { - value: 'Code', + value: 'brand', metadata: { pattern: 'HEX BOT', toLowerCase: true, @@ -309,7 +311,7 @@ describe('BaseNlpHelper', () => { }, }, ], - } as any; + } as NlpEntityFull; const result = helper.extractPatternBasedSlots( 'My CODE is HEX BOT!', @@ -317,7 +319,8 @@ describe('BaseNlpHelper', () => { ); expect(result).toEqual([ { - entity: 'code', + entity: 'name', + canonicalValue: 'brand', value: 'hexbot', start: 11, end: 18, @@ -349,6 +352,7 @@ describe('BaseNlpHelper', () => { expect(result).toEqual([ { entity: 'keyword', + canonicalValue: 'word', value: '"ou"', start: 9, end: 13, diff --git a/api/src/helper/lib/base-nlp-helper.ts b/api/src/helper/lib/base-nlp-helper.ts index b1928c2f..3b94c3f6 100644 --- a/api/src/helper/lib/base-nlp-helper.ts +++ b/api/src/helper/lib/base-nlp-helper.ts @@ -288,9 +288,9 @@ export default abstract class BaseNlpHelper< } return (entity.values - .flatMap((patternValue) => { + .flatMap((nlpValue) => { const processedText = text; - const pattern = patternValue.metadata?.pattern; + const pattern = nlpValue.metadata?.pattern; if (!pattern) { this.logger.error('Missing NLP regex pattern'); @@ -299,7 +299,7 @@ export default abstract class BaseNlpHelper< let regex: RegExp; try { - const shouldWrap = patternValue.metadata?.wordBoundary; + const shouldWrap = nlpValue.metadata?.wordBoundary; regex = new RegExp(shouldWrap ? `\\b${pattern}\\b` : pattern, 'gi'); } catch { this.logger.error('Invalid NLP regex pattern'); @@ -312,21 +312,22 @@ export default abstract class BaseNlpHelper< let value = match[0]; // Apply preprocessing if needed - if (patternValue.metadata?.removeSpaces) { + if (nlpValue.metadata?.removeSpaces) { value = value.replace(/\s+/g, ''); } - if (patternValue.metadata?.toLowerCase) { + if (nlpValue.metadata?.toLowerCase) { value = value.toLowerCase(); } - if (patternValue.metadata?.stripDiacritics) { + if (nlpValue.metadata?.stripDiacritics) { value = value.normalize('NFD').replace(/[\u0300-\u036f]/g, ''); } return { entity: entity.name, value, + canonicalValue: nlpValue.value, start: match.index!, end: match.index! + match[0].length, confidence: 1, diff --git a/api/src/helper/types.ts b/api/src/helper/types.ts index 4e82e493..fd373f85 100644 --- a/api/src/helper/types.ts +++ b/api/src/helper/types.ts @@ -21,6 +21,9 @@ export namespace NLU { confidence: number; start?: number; end?: number; + // When lookup strategy is either 'keywords' or 'pattern', the canonical value + // is the actual NlpValue.value, given the match is either a synonym (expression) or a pattern match + canonicalValue?: string; } export interface ParseEntities { diff --git a/api/src/nlp/services/nlp.service.ts b/api/src/nlp/services/nlp.service.ts index 6a779648..1c2c1f34 100644 --- a/api/src/nlp/services/nlp.service.ts +++ b/api/src/nlp/services/nlp.service.ts @@ -51,6 +51,7 @@ export class NlpService { .filter(({ entity }) => nlpMap.has(entity)) .map((e) => { const entity = nlpMap.get(e.entity)!; + return { ...e, score: e.confidence * (entity.weight || 1), diff --git a/api/src/utils/test/mocks/block.ts b/api/src/utils/test/mocks/block.ts index c1c45611..32a26f70 100644 --- a/api/src/utils/test/mocks/block.ts +++ b/api/src/utils/test/mocks/block.ts @@ -294,6 +294,14 @@ export const mockNlpGreetingAnyNamePatterns: NlpPattern[] = [ }, ]; +export const mockNlpFirstNamePatterns: NlpPattern[] = [ + { + entity: 'firstname', + match: 'value', + value: 'jhon', + }, +]; + export const mockModifiedNlpBlock: BlockFull = { ...baseBlockInstance, name: 'Modified Mock Nlp', diff --git a/api/src/utils/test/mocks/nlp.ts b/api/src/utils/test/mocks/nlp.ts index f7f3b5ca..d7a99011 100644 --- a/api/src/utils/test/mocks/nlp.ts +++ b/api/src/utils/test/mocks/nlp.ts @@ -44,3 +44,14 @@ export const mockNlpGreetingFullNameEntities: NLU.ParseEntities = { }, ], }; + +export const mockNlpFirstNameEntities: NLU.ParseEntities = { + entities: [ + { + entity: 'firstname', + value: 'jhonny', + canonicalValue: 'jhon', + confidence: 0.75, + }, + ], +};