feat: priority scoring re-calculation & enabling weight modification in builtin nlp entities

This commit is contained in:
Mohamed Marrouchi
2025-04-07 20:20:19 +01:00
committed by MohamedAliBouhaouala
parent 5f08076e8b
commit 896f01c5af
9 changed files with 390 additions and 129 deletions

View File

@@ -8,6 +8,8 @@
import { z } from 'zod';
import { BlockFull } from '../block.schema';
import { PayloadType } from './button';
export const payloadPatternSchema = z.object({
@@ -57,3 +59,8 @@ export const patternSchema = z.union([
]);
export type Pattern = z.infer<typeof patternSchema>;
export type MatchResult = {
block: BlockFull;
matchedPattern: NlpPattern[];
};

View File

@@ -52,13 +52,17 @@ import {
blockProductListMock,
blocks,
mockNlpBlock,
nlpBlocks,
mockNlpPatternsSetOne,
mockNlpPatternsSetTwo,
} from '@/utils/test/mocks/block';
import {
contextBlankInstance,
subscriberContextBlankInstance,
} from '@/utils/test/mocks/conversation';
import { nlpEntitiesGreeting } from '@/utils/test/mocks/nlp';
import {
mockNlpEntitiesSetOne,
nlpEntitiesGreeting,
} from '@/utils/test/mocks/nlp';
import {
closeInMongodConnection,
rootMongooseTestModule,
@@ -71,6 +75,7 @@ import { Category, CategoryModel } from '../schemas/category.schema';
import { LabelModel } from '../schemas/label.schema';
import { FileType } from '../schemas/types/attachment';
import { StdOutgoingListMessage } from '../schemas/types/message';
import { NlpPattern } from '../schemas/types/pattern';
import { CategoryRepository } from './../repositories/category.repository';
import { BlockService } from './block.service';
@@ -96,6 +101,18 @@ const mockNlpEntityService = {
return Promise.resolve(null); // Default response if the entity isn't found
}),
};
const mockNlpValueService = {
find: jest.fn().mockImplementation((query) => {
if (query.entity === '67e3e41eff551ca5be70559c') {
return Promise.resolve([{ value: 'greeting' }, { value: 'affirmation' }]); // Simulating multiple values for 'intent'
}
if (query.entity === '67e3e41eff551ca5be70559d') {
return Promise.resolve([{ value: 'jhon' }, { value: 'doe' }]); // Simulating multiple values for 'firstname'
}
return Promise.resolve([]); // Default response for no matching entity
}),
};
describe('BlockService', () => {
let blockRepository: BlockRepository;
let categoryRepository: CategoryRepository;
@@ -105,7 +122,6 @@ describe('BlockService', () => {
let hasPreviousBlocks: Block;
let contentService: ContentService;
let contentTypeService: ContentTypeService;
let nlpEntityService: NlpEntityService;
beforeAll(async () => {
const { getMocks } = await buildTestingMocks({
@@ -140,15 +156,17 @@ describe('BlockService', () => {
ContentService,
AttachmentService,
LanguageService,
NlpEntityService,
NlpEntityRepository,
NlpValueService,
NlpValueRepository,
NlpSampleEntityRepository,
{
provide: NlpEntityService, // Mocking NlpEntityService
useValue: mockNlpEntityService,
},
{
provide: NlpValueService, // Mocking NlpValueService
useValue: mockNlpValueService,
},
{
provide: PluginService,
useValue: {},
@@ -188,14 +206,12 @@ describe('BlockService', () => {
contentTypeService,
categoryRepository,
blockRepository,
nlpEntityService,
] = await getMocks([
BlockService,
ContentService,
ContentTypeService,
CategoryRepository,
BlockRepository,
NlpEntityService,
]);
category = (await categoryRepository.findOne({ label: 'default' }))!;
hasPreviousBlocks = (await blockRepository.findOne({
@@ -365,56 +381,154 @@ describe('BlockService', () => {
});
});
// describe('matchBestNLP', () => {
// it('should return undefined if blocks is empty', async () => {
// const result = await blockService.matchBestNLP([]);
// expect(result).toBeUndefined();
// });
// it('should return the only block if there is one', async () => {
// const result = await blockService.matchBestNLP([blockEmpty]);
// expect(result).toBe(blockEmpty);
// });
// it('should correctly select the best block based on NLP scores', async () => {
// const result = await blockService.matchBestNLP(nlpBlocks);
// expect(result).toBe(mockNlpBlock);
// // Iterate over each block
// for (const block of nlpBlocks) {
// // Flatten the patterns array and filter valid NLP patterns
// block.patterns
// .flatMap((pattern) => (Array.isArray(pattern) ? pattern : []))
// .filter((p) => typeof p === 'object' && 'entity' in p && 'match' in p) // Filter only valid patterns with entity and match
// .forEach((p) => {
// // Check if findOne was called with the correct entity
// expect(nlpEntityService.findOne).toHaveBeenCalledWith(
// { name: p.entity },
// undefined,
// { _id: 0, lookups: 1, weight: 1 },
// );
// });
// }
// });
// it('should return the block with the highest combined score', async () => {
// const result = await blockService.matchBestNLP(nlpBlocks);
// expect(result).toBe(mockNlpBlock);
// // Iterate over each block
// for (const block of nlpBlocks) {
// // Flatten the patterns array and filter valid NLP patterns
// block.patterns
// .flatMap((pattern) => (Array.isArray(pattern) ? pattern : []))
// .filter((p) => typeof p === 'object' && 'entity' in p && 'match' in p) // Filter only valid patterns with entity and match
// .forEach((p) => {
// // Check if findOne was called with the correct entity
// expect(nlpEntityService.findOne).toHaveBeenCalledWith(
// { name: p.entity },
// undefined,
// { _id: 0, lookups: 1, weight: 1 },
// );
// });
// }
// });
// });
describe('matchBestNLP', () => {
it('should return undefined if blocks is empty', async () => {
const result = await blockService.matchBestNLP([]);
expect(result).toBeUndefined();
it('should return the block with the highest NLP score', async () => {
const blocks = [mockNlpBlock, blockGetStarted]; // You can add more blocks with different patterns and scores
const matchedPatterns = [mockNlpPatternsSetOne, mockNlpPatternsSetTwo];
const nlp = mockNlpEntitiesSetOne;
// Spy on calculateBlockScore to check if it's called
const calculateBlockScoreSpy = jest
.spyOn(blockService, 'calculateBlockScore')
.mockResolvedValue(1.499); // Mock return value
const bestBlock = await blockService.matchBestNLP(
blocks,
matchedPatterns,
nlp,
);
// Assert that the block with the highest NLP score is selected
expect(bestBlock).toEqual(mockNlpBlock); // Adjust based on which block should be selected
// Ensure calculateBlockScore was called at least once for each block
expect(calculateBlockScoreSpy).toHaveBeenCalledTimes(2); // Called for each block
// Restore the spy after the test
calculateBlockScoreSpy.mockRestore();
// Assert that the block with the highest NLP score is returned
expect(bestBlock).toEqual(mockNlpBlock); // Adjust this as per the expected behavior
});
it('should return the only block if there is one', async () => {
const result = await blockService.matchBestNLP([blockEmpty]);
expect(result).toBe(blockEmpty);
it('should return undefined if no blocks match or the list is empty', async () => {
const blocks: Block[] = []; // Empty block array
const matchedPatterns: NlpPattern[][] = [];
const nlp = mockNlpEntitiesSetOne;
const bestBlock = await blockService.matchBestNLP(
blocks,
matchedPatterns,
nlp,
);
// Assert that undefined is returned when no blocks are available
expect(bestBlock).toBeUndefined();
});
});
describe('calculateBlockScore', () => {
it('should calculate the correct NLP score for a block', async () => {
const entityCache = new Map<
string,
{ id: string; weight: number; values: string[] }
>();
const score = await blockService.calculateBlockScore(
mockNlpPatternsSetOne,
mockNlpEntitiesSetOne,
entityCache,
);
expect(score).toBe(1.499);
});
it('should correctly select the best block based on NLP scores', async () => {
const result = await blockService.matchBestNLP(nlpBlocks);
expect(result).toBe(mockNlpBlock);
it('should return 0 if no matching entities are found', async () => {
const entityCache = new Map<
string,
{ id: string; weight: number; values: string[] }
>();
const score = await blockService.calculateBlockScore(
mockNlpPatternsSetTwo,
mockNlpEntitiesSetOne,
entityCache,
);
// Iterate over each block
for (const block of nlpBlocks) {
// Flatten the patterns array and filter valid NLP patterns
block.patterns
.flatMap((pattern) => (Array.isArray(pattern) ? pattern : []))
.filter((p) => typeof p === 'object' && 'entity' in p && 'match' in p) // Filter only valid patterns with entity and match
.forEach((p) => {
// Check if findOne was called with the correct entity
expect(nlpEntityService.findOne).toHaveBeenCalledWith(
{ name: p.entity },
undefined,
{ _id: 0, lookups: 1, weight: 1 },
);
});
}
expect(score).toBe(0); // No matching entity, so score should be 0
});
it('should correctly use entity cache to avoid redundant database calls', async () => {
const entityCache = new Map<
string,
{ id: string; weight: number; values: string[] }
>();
it('should return the block with the highest combined score', async () => {
const result = await blockService.matchBestNLP(nlpBlocks);
expect(result).toBe(mockNlpBlock);
// Iterate over each block
for (const block of nlpBlocks) {
// Flatten the patterns array and filter valid NLP patterns
block.patterns
.flatMap((pattern) => (Array.isArray(pattern) ? pattern : []))
.filter((p) => typeof p === 'object' && 'entity' in p && 'match' in p) // Filter only valid patterns with entity and match
.forEach((p) => {
// Check if findOne was called with the correct entity
expect(nlpEntityService.findOne).toHaveBeenCalledWith(
{ name: p.entity },
undefined,
{ _id: 0, lookups: 1, weight: 1 },
);
});
}
// First call should calculate and cache entity data
await blockService.calculateBlockScore(
mockNlpPatternsSetOne,
mockNlpEntitiesSetOne,
entityCache,
);
const cacheSizeBefore = entityCache.size;
// Second call should use cached entity data, without redundant DB calls
await blockService.calculateBlockScore(
mockNlpPatternsSetOne,
mockNlpEntitiesSetOne,
entityCache,
);
const cacheSizeAfter = entityCache.size;
// Assert that the cache size hasn't increased after the second call
expect(cacheSizeBefore).toBe(cacheSizeAfter);
});
});

View File

@@ -17,6 +17,7 @@ import { NLU } from '@/helper/types';
import { I18nService } from '@/i18n/services/i18n.service';
import { LanguageService } from '@/i18n/services/language.service';
import { NlpEntityService } from '@/nlp/services/nlp-entity.service';
import { NlpValueService } from '@/nlp/services/nlp-value.service';
import { PluginService } from '@/plugins/plugins.service';
import { PluginType } from '@/plugins/types';
import { SettingService } from '@/setting/services/setting.service';
@@ -36,7 +37,11 @@ import {
StdOutgoingEnvelope,
StdOutgoingSystemEnvelope,
} from '../schemas/types/message';
import { NlpPattern, PayloadPattern } from '../schemas/types/pattern';
import {
MatchResult,
NlpPattern,
PayloadPattern,
} from '../schemas/types/pattern';
import { Payload, StdQuickReply } from '../schemas/types/quick-reply';
import { SubscriberContext } from '../schemas/types/subscriberContext';
@@ -55,6 +60,7 @@ export class BlockService extends BaseService<
protected readonly i18n: I18nService,
protected readonly languageService: LanguageService,
protected readonly entityService: NlpEntityService,
protected readonly valueService: NlpValueService,
) {
super(repository);
}
@@ -183,12 +189,41 @@ export class BlockService extends BaseService<
.shift();
// Perform an NLP Match
if (!block && nlp) {
// Find block pattern having the best match of nlp entities
const newBlocks = filteredBlocks.filter((b) => {
return this.matchNLP(nlp, b);
});
block = (await this.matchBestNLP(newBlocks)) as BlockFull | undefined;
// Use the `reduce` function to iterate over `filteredBlocks` and accumulate a new array `matchesWithPatterns`.
// This approach combines the matching of NLP patterns and filtering of blocks with empty or invalid matches
// into a single operation. This avoids the need for a separate mapping and filtering step, improving performance.
// For each block in `filteredBlocks`, we call `matchNLP` to find patterns that match the NLP data.
// If `matchNLP` returns a non-empty list of matched patterns, the block and its matched patterns are added
// to the accumulator array `acc`, which is returned as the final result.
// This ensures that only blocks with valid matches are kept, and blocks with no matches are excluded,
// all while iterating through the list only once.
const matchesWithPatterns = filteredBlocks.reduce<MatchResult[]>(
(acc, b) => {
const matchedPattern = this.matchNLP(nlp, b);
if (matchedPattern && matchedPattern.length > 0) {
acc.push({ block: b, matchedPattern });
}
return acc;
},
[],
);
// Log the matched patterns
this.logger.debug(
`Matched patterns: ${JSON.stringify(matchesWithPatterns.map((p) => p.matchedPattern))}`,
);
// Proceed with matching the best NLP block
if (matchesWithPatterns.length > 0) {
block = (await this.matchBestNLP(
matchesWithPatterns.map((m) => m.block),
matchesWithPatterns.map((p) => p.matchedPattern),
nlp,
)) as BlockFull | undefined;
}
}
}
@@ -336,70 +371,122 @@ export class BlockService extends BaseService<
}
/**
* Identifies and returns the best-matching block based on NLP entity scores.
* Matches the best block based on NLP pattern scoring.
* The function calculates the NLP score for each block based on the matched patterns and selected entity weights,
* and returns the block with the highest score.
*
* This function evaluates a list of blocks by analyzing their associated NLP entities
* and scoring them based on predefined lookup entities. The block with the highest
* score is selected as the best match.
* @param blocks - Blocks on which to perform the filtering
*
* @returns The best block
* @param blocks - Array of blocks to match with patterns
* @param matchedPatterns - Array of matched NLP patterns corresponding to each block
* @param nlp - The NLP parsed entities to compare against
* @returns The block with the highest NLP score, or undefined if no valid block is found
*/
async matchBestNLP(
blocks: Block[] | BlockFull[] | undefined,
blocks: (Block | BlockFull)[] | undefined,
matchedPatterns: NlpPattern[][],
nlp: NLU.ParseEntities,
): Promise<Block | BlockFull | undefined> {
// No blocks to check against
if (blocks?.length === 0 || !blocks) {
return undefined;
}
if (!blocks || blocks.length === 0) return undefined;
if (blocks.length === 1) return blocks[0];
// If there's only one block, return it immediately.
if (blocks.length === 1) {
return blocks[0];
}
let bestBlock: Block | BlockFull | undefined;
let highestScore = 0;
// Iterate over each block in blocks
for (const block of blocks) {
let nlpScore = 0;
const entityCache = new Map<
string,
{ id: string; weight: number; values: string[] }
>();
// Gather all entity lookups for patterns that include an entity
const entityLookups = await Promise.all(
block.patterns
.flatMap((pattern) => (Array.isArray(pattern) ? pattern : []))
.filter((p) => typeof p === 'object' && 'entity' in p && 'match' in p)
.map(async (pattern) => {
const entityName = pattern.entity;
return await this.entityService.findOne(
{ name: entityName },
undefined,
{ lookups: 1, weight: 1, _id: 0 },
);
}),
// Iterate through all blocks and calculate their NLP score
for (let i = 0; i < blocks.length; i++) {
const block = blocks[i];
const patterns = matchedPatterns[i];
// If compatible, calculate the NLP score for this block
const nlpScore = await this.calculateBlockScore(
patterns,
nlp,
entityCache,
);
nlpScore += entityLookups.reduce((score, entityLookup) => {
if (entityLookup && entityLookup.lookups[0] && entityLookup.weight) {
return score + entityLookup.weight; // Add points based on the Nlp entity associated weight
}
return score; // Return the current score if no match
}, 0);
// Update the best block if the current block has a higher NLP score
if (nlpScore > highestScore) {
highestScore = nlpScore;
bestBlock = block;
}
}
this.logger.debug(`Best Nlp Score obtained ${highestScore}`);
this.logger.debug(
`Best retrieved block based on NLP entities ${JSON.stringify(bestBlock)}`,
);
this.logger.debug(`Best NLP score obtained: ${highestScore}`);
this.logger.debug(`Best block selected: ${JSON.stringify(bestBlock)}`);
return bestBlock;
}
/**
* Calculates the NLP score for a single block based on the matched patterns and parsed NLP entities.
* The score is calculated by matching each entity in the pattern with the parsed NLP entities and evaluating
* their confidence and weight from the database.
*
* @param patterns - The NLP patterns matched for the block
* @param nlp - The parsed NLP entities
* @param entityCache - A cache for storing previously fetched entity data to avoid redundant DB calls
* @returns The calculated NLP score for the block
*/
async calculateBlockScore(
patterns: NlpPattern[],
nlp: NLU.ParseEntities,
entityCache: Map<string, { id: string; weight: number; values: string[] }>,
): Promise<number> {
let nlpScore = 0;
const patternScores = await Promise.all(
patterns.map(async (pattern) => {
const entityName = pattern.entity;
// Retrieve entity data from cache or database if not cached
let entityData = entityCache.get(entityName);
if (!entityData) {
const entityLookup = await this.entityService.findOne(
{ name: entityName },
undefined,
{ lookups: 1, weight: 1, _id: 1 },
);
if (!entityLookup?.id || !entityLookup.weight) return 0;
const valueLookups = await this.valueService.find(
{ entity: entityLookup.id },
undefined,
{ value: 1, _id: 0 },
);
const values = valueLookups.map((v) => v.value);
// Cache the entity data
entityData = {
id: entityLookup.id,
weight: entityLookup.weight,
values,
};
entityCache.set(entityName, entityData);
}
// Check if the NLP entity matches with the cached data
const matchedEntity = nlp.entities.find(
(e) =>
e.entity === entityName &&
entityData?.values.some((v) => v === e.value) &&
(pattern.match !== 'value' || e.value === pattern.value),
);
return matchedEntity?.confidence
? matchedEntity.confidence * entityData.weight
: 0;
}),
);
// Sum up the scores for all patterns
nlpScore = patternScores.reduce((sum, score) => sum + score, 0);
return nlpScore;
}
/**
* Matches an outcome-based block from a list of available blocks
* based on the outcome of a system message.

View File

@@ -262,16 +262,19 @@ describe('NlpEntityController', () => {
).rejects.toThrow(NotFoundException);
});
it('should throw exception when nlp entity is builtin', async () => {
it('should update the NLP entity even if it is builtin', async () => {
const updateNlpEntity: NlpEntityCreateDto = {
name: 'updated',
name: 'intent',
doc: '',
lookups: ['trait'],
builtin: false,
builtin: true,
weight: 2,
};
await expect(
nlpEntityController.updateOne(buitInEntityId!, updateNlpEntity),
).rejects.toThrow(MethodNotAllowedException);
const result = await nlpEntityController.updateOne(
buitInEntityId!,
updateNlpEntity,
);
expect(result).toEqual(expect.objectContaining(updateNlpEntity));
});
});
describe('deleteMany', () => {

View File

@@ -157,11 +157,6 @@ export class NlpEntityController extends BaseController<
this.logger.warn(`Unable to update NLP Entity by id ${id}`);
throw new NotFoundException(`NLP Entity with ID ${id} not found`);
}
if (nlpEntity.builtin) {
throw new MethodNotAllowedException(
`Cannot update builtin NLP Entity ${nlpEntity.name}`,
);
}
return await this.nlpEntityService.updateOne(id, updateNlpEntityDto);
}

View File

@@ -16,7 +16,7 @@ import { ButtonType, PayloadType } from '@/chat/schemas/types/button';
import { CaptureVar } from '@/chat/schemas/types/capture-var';
import { OutgoingMessageFormat } from '@/chat/schemas/types/message';
import { BlockOptions, ContentOptions } from '@/chat/schemas/types/options';
import { Pattern } from '@/chat/schemas/types/pattern';
import { NlpPattern, Pattern } from '@/chat/schemas/types/pattern';
import { QuickReplyType } from '@/chat/schemas/types/quick-reply';
import { modelInstance } from './misc';
@@ -246,6 +246,32 @@ export const blockGetStarted = {
message: ['Welcome! How are you ? '],
} as unknown as BlockFull;
export const mockNlpPatternsSetOne: NlpPattern[] = [
{
entity: 'intent',
match: 'value',
value: 'greeting',
},
{
entity: 'firstname',
match: 'value',
value: 'jhon',
},
];
export const mockNlpPatternsSetTwo: NlpPattern[] = [
{
entity: 'intent',
match: 'value',
value: 'affirmation',
},
{
entity: 'firstname',
match: 'value',
value: 'mark',
},
];
export const mockNlpBlock = {
...baseBlockInstance,
name: 'Mock Nlp',
@@ -254,21 +280,19 @@ export const mockNlpBlock = {
'/we*lcome/',
{ label: 'Mock Nlp', value: 'MOCK_NLP' },
[
{
entity: 'intent',
match: 'value',
value: 'greeting',
},
{
entity: 'intent',
match: 'value',
value: 'want',
},
{
entity: 'intent',
match: 'value',
value: 'affirmative',
},
...mockNlpPatternsSetOne,
[
{
entity: 'intent',
match: 'value',
value: 'greeting',
},
{
entity: 'firstname',
match: 'value',
value: 'doe',
},
],
],
],
trigger_labels: customerLabelsMock,

View File

@@ -1,5 +1,5 @@
/*
* Copyright © 2024 Hexastack. All rights reserved.
* Copyright © 2025 Hexastack. All rights reserved.
*
* Licensed under the GNU Affero General Public License v3.0 (AGPLv3) with the following additional terms:
* 1. The name "Hexabot" is a trademark of Hexastack. You may not use this name in derivative works without express written permission.
@@ -27,3 +27,33 @@ export const nlpEntitiesGreeting: NLU.ParseEntities = {
},
],
};
export const mockNlpEntitiesSetOne: NLU.ParseEntities = {
entities: [
{
entity: 'intent',
value: 'greeting',
confidence: 0.999,
},
{
entity: 'firstname',
value: 'jhon',
confidence: 0.5,
},
],
};
export const mockNlpEntitiesSetTwo: NLU.ParseEntities = {
entities: [
{
entity: 'intent',
value: 'greeting',
confidence: 0.94,
},
{
entity: 'firstname',
value: 'doe',
confidence: 0.33,
},
],
};

View File

@@ -156,8 +156,7 @@ function StackComponent<T extends GridValidRowModel>({
disabled={
(isDisabled && isDisabled(params.row)) ||
(params.row.builtin &&
(requires.includes(PermissionAction.UPDATE) ||
requires.includes(PermissionAction.DELETE)))
requires.includes(PermissionAction.DELETE))
}
onClick={() => {
action && action(params.row);

View File

@@ -123,6 +123,7 @@ export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
required
autoFocus
helperText={errors.name ? errors.name.message : null}
disabled={data?.builtin}
/>
</ContentItem>
<ContentItem>
@@ -130,6 +131,7 @@ export const NlpEntityVarForm: FC<ComponentFormProps<INlpEntity>> = ({
label={t("label.doc")}
{...register("doc")}
multiline={true}
disabled={data?.builtin}
/>
</ContentItem>
<ContentItem>