From 3ed21b40a7da815f93906dc72c0d05bd5c581121 Mon Sep 17 00:00:00 2001 From: MohamedAliBouhaouala Date: Mon, 5 May 2025 18:35:12 +0100 Subject: [PATCH] fix: decompose code into helper utils, add nlp entity dto validation, remove type casting --- api/docs/nlp/README.md | 14 +-- api/src/chat/services/block.service.spec.ts | 4 +- api/src/chat/services/block.service.ts | 122 +++++++++++++++----- api/src/nlp/dto/nlp-entity.dto.ts | 5 + api/src/nlp/schemas/types.ts | 11 +- api/src/nlp/services/nlp-entity.service.ts | 5 +- 6 files changed, 115 insertions(+), 46 deletions(-) diff --git a/api/docs/nlp/README.md b/api/docs/nlp/README.md index 89f1b46c..cc7daa2e 100644 --- a/api/docs/nlp/README.md +++ b/api/docs/nlp/README.md @@ -13,7 +13,7 @@ It enables more intelligent and context-aware block selection in conversational ### Standard Matching A user input contains entities that directly match a block’s patterns. -```bash +```ts Example: Input: intent = enquiry & subject = claim Block A: Patterns: intent: enquiry & subject: claim Block A will be selected. @@ -24,7 +24,7 @@ Block A will be selected. A block may match only some patterns but have high-confidence input on those matched ones, making it a better candidate than others with full matches but low-confidence entities. **Note: Confidence is multiplied by a pre-defined weight for each entity type.** -```bash +```ts Example: Input: intent = issue (confidence: 0.92) & subject = claim (confidence: 0.65) Block A: Pattern: intent: issue @@ -34,7 +34,7 @@ Block B: Pattern: subject: claim ### Multiple Blocks with Similar Patterns -```bash +```ts Input: intent = issue & subject = insurance Block A: intent = enquiry & subject = insurance Block B: subject = insurance @@ -45,7 +45,7 @@ Block B: subject = insurance If a block contains patterns that require entities not present in the user input, the block is excluded from scoring altogether. No penalties are applied — the block simply isn't considered a valid candidate. -```bash +```ts Input: intent = issue & subject = insurance Block A: intent = enquiry & subject = insurance & location = office Block B: subject = insurance & time = morning @@ -56,7 +56,7 @@ Block B: subject = insurance & time = morning When multiple blocks receive similar scores, penalty factors can help break the tie — especially in cases where patterns are less specific (e.g., using `Any` as a value). -```bash +```ts Input: intent = enquiry & subject = insurance Block A: intent = enquiry & subject = Any @@ -80,7 +80,7 @@ For each entity in the block's pattern: - If the entity `matches` an entity in the user input: - the score is increased by: `confidence × weight` - `Confidence` is a value between 0 and 1, returned by the NLU engine. - - `Weight` is a configured importance factor for that specific entity type. + - `Weight` (default value is `1`) is a configured importance factor for that specific entity type. - If the match is a wildcard (i.e., the block accepts any value): - A **penalty factor** is applied to slightly reduce its contribution: ``confidence × weight × penaltyFactor``. This encourages more specific matches when available. @@ -89,7 +89,7 @@ For each entity in the block's pattern: For each matched entity: -```bash +```ts score += confidence × weight × [optional penalty factor if wildcard] ``` diff --git a/api/src/chat/services/block.service.spec.ts b/api/src/chat/services/block.service.spec.ts index 41cef39a..73d0fd34 100644 --- a/api/src/chat/services/block.service.spec.ts +++ b/api/src/chat/services/block.service.spec.ts @@ -74,7 +74,7 @@ import { import { buildTestingMocks } from '@/utils/test/utils'; import { BlockRepository } from '../repositories/block.repository'; -import { Block, BlockModel } from '../schemas/block.schema'; +import { Block, BlockFull, BlockModel } from '../schemas/block.schema'; import { Category, CategoryModel } from '../schemas/category.schema'; import { LabelModel } from '../schemas/label.schema'; import { FileType } from '../schemas/types/attachment'; @@ -415,7 +415,7 @@ describe('BlockService', () => { jest .spyOn(nlpEntityService, 'getNlpMap') .mockResolvedValue(mockNlpCacheMap); - const blocks: Block[] = []; // Empty block array + const blocks: BlockFull[] = []; // Empty block array const matchedPatterns: NlpPattern[][] = []; const nlp = mockNlpEntitiesSetOne; diff --git a/api/src/chat/services/block.service.ts b/api/src/chat/services/block.service.ts index 08fd9a9a..7829aadc 100644 --- a/api/src/chat/services/block.service.ts +++ b/api/src/chat/services/block.service.ts @@ -16,7 +16,7 @@ import { CONSOLE_CHANNEL_NAME } from '@/extensions/channels/console/settings'; import { NLU } from '@/helper/types'; import { I18nService } from '@/i18n/services/i18n.service'; import { LanguageService } from '@/i18n/services/language.service'; -import { NlpCacheMap } from '@/nlp/schemas/types'; +import { NlpCacheMap, NlpCacheMapValues } from '@/nlp/schemas/types'; import { NlpEntityService } from '@/nlp/services/nlp-entity.service'; import { PluginService } from '@/plugins/plugins.service'; import { PluginType } from '@/plugins/types'; @@ -219,12 +219,12 @@ export class BlockService extends BaseService< // Proceed with matching the best NLP block if (matchesWithPatterns.length > 0) { - block = (await this.matchBestNLP( + block = await this.matchBestNLP( matchesWithPatterns.map((m) => m.block), matchesWithPatterns.map((p) => p.matchedPattern), nlp, nluPenaltyFactor, - )) as BlockFull | undefined; + ); } } } @@ -384,27 +384,18 @@ export class BlockService extends BaseService< * @returns The block with the highest NLP score, or undefined if no valid block is found. */ async matchBestNLP( - blocks: (Block | BlockFull)[] | undefined, + blocks: BlockFull[], matchedPatterns: NlpPattern[][], nlp: NLU.ParseEntities, nlpPenaltyFactor: number, - ): Promise { + ): Promise { if (!blocks || blocks.length === 0) return undefined; if (blocks.length === 1) return blocks[0]; - let bestBlock: Block | BlockFull | undefined; + let bestBlock: BlockFull | undefined; let highestScore = 0; - const entityNames: string[] = blocks.flatMap((block) => - block.patterns.flatMap((patternGroup) => { - if (Array.isArray(patternGroup)) { - return patternGroup.flatMap((pattern) => - isNlpPattern(pattern) ? [pattern.entity] : [], - ); - } - return []; // Skip non-array patternGroups - }), - ); - const uniqueEntityNames: string[] = [...new Set(entityNames)]; + const entityNames = this.extractNlpEntityNames(blocks); + const uniqueEntityNames = [...new Set(entityNames)]; const nlpCacheMap: NlpCacheMap = await this.entityService.getNlpMap(uniqueEntityNames); // Iterate through all blocks and calculate their NLP score @@ -412,7 +403,7 @@ export class BlockService extends BaseService< const block = blocks[i]; const patterns = matchedPatterns[i]; // If compatible, calculate the NLP score for this block - const nlpScore: number = this.calculateBlockScore( + const nlpScore = this.calculateBlockScore( patterns, nlp, nlpCacheMap, @@ -424,8 +415,13 @@ export class BlockService extends BaseService< } } - this.logger.debug(`Best NLP score obtained: ${highestScore}`); - this.logger.debug(`Best block selected: ${JSON.stringify(bestBlock)}`); + if (bestBlock) { + this.logger.debug(`Best NLP score obtained: ${highestScore}`); + this.logger.debug(`Best block selected:`, { + id: bestBlock.id, + name: bestBlock.name, + }); + } return bestBlock; } @@ -452,28 +448,96 @@ export class BlockService extends BaseService< nlpPenaltyFactor: number, ): number { // Compute individual pattern scores using the cache + if (!patterns.length) return 0; const patternScores: number[] = patterns.map((pattern) => { const entityData = nlpCacheMap.get(pattern.entity); if (!entityData) return 0; const matchedEntity: NLU.ParseEntity | undefined = nlp.entities.find( - (e) => - e.entity === pattern.entity && - entityData?.values.some((v) => v === e.value) && - (pattern.match !== 'value' || e.value === pattern.value), + (e) => this.matchesEntityData(e, pattern, entityData), ); - return matchedEntity?.confidence - ? matchedEntity.confidence * - entityData.weight * - (pattern.match === 'entity' ? nlpPenaltyFactor : 1) - : 0; + return this.computePatternScore( + matchedEntity, + pattern, + entityData, + nlpPenaltyFactor, + ); }); // Sum the scores return patternScores.reduce((sum, score) => sum + score, 0); } + /** + * Extracts the names of NLP entities from a given list of blocks. + * This method recursively goes through each block, pattern group, and pattern, + * filtering for valid NLP patterns and extracting the `entity` field. + * The resulting array contains the names of all the NLP entities found across all patterns. + * + * @param blocks - An array of `BlockFull` objects containing patterns. + * @returns An array of NLP entity names as strings. + */ + private extractNlpEntityNames(blocks: BlockFull[]): string[] { + return blocks.flatMap((block) => + block.patterns.flatMap((patternGroup) => { + if (Array.isArray(patternGroup)) { + return patternGroup.flatMap((pattern) => + isNlpPattern(pattern) ? [pattern.entity] : [], + ); + } + return []; // Skip non-array patternGroups + }), + ); + } + + /** + * Checks if a given `ParseEntity` from the NLP model matches the specified pattern + * and if its value exists within the values provided in the cache for the specified entity. + * + * @param e - The `ParseEntity` object from the NLP model, containing information about the entity and its value. + * @param pattern - The `NlpPattern` object representing the entity and value pattern to be matched. + * @param entityData - The `NlpCacheMapValues` object containing cached data, including entity values and weight, for the entity being matched. + * + * @returns A boolean indicating whether the `ParseEntity` matches the pattern and entity data from the cache. + * + * - The function compares the entity type between the `ParseEntity` and the `NlpPattern`. + * - If the pattern's match type is not `'value'`, it checks if the entity's value is present in the cache's `values` array. + * - If the pattern's match type is `'value'`, it further ensures that the entity's value matches the specified value in the pattern. + * - Returns `true` if all conditions are met, otherwise `false`. + */ + private matchesEntityData( + e: NLU.ParseEntity, + pattern: NlpPattern, + entityData: NlpCacheMapValues, + ): boolean { + return ( + e.entity === pattern.entity && + entityData?.values.some((v) => v === e.value) && + (pattern.match !== 'value' || e.value === pattern.value) + ); + } + + /** + * Computes the score for a given entity based on its confidence, weight, and penalty factor. + * + * @param entity - The `ParseEntity` to check, which may be `undefined` if no match is found. + * @param pattern - The `NlpPattern` object that specifies how to match the entity and its value. + * @param entityData - The cached data for the given entity, including `weight` and `values`. + * @param nlpPenaltyFactor - The penalty factor applied when the pattern's match type is 'entity'. + * @returns The computed score based on the entity's confidence, the cached weight, and the penalty factor. + */ + private computePatternScore( + entity: NLU.ParseEntity | undefined, + pattern: NlpPattern, + entityData: NlpCacheMapValues, + nlpPenaltyFactor: number, + ): number { + if (!entity || !entity.confidence) return 0; + const penalty = pattern.match === 'entity' ? nlpPenaltyFactor : 1; + return entity.confidence * entityData.weight * penalty; + } + /** * Matches an outcome-based block from a list of available blocks * based on the outcome of a system message. diff --git a/api/src/nlp/dto/nlp-entity.dto.ts b/api/src/nlp/dto/nlp-entity.dto.ts index 6efaa999..98c1002b 100644 --- a/api/src/nlp/dto/nlp-entity.dto.ts +++ b/api/src/nlp/dto/nlp-entity.dto.ts @@ -11,11 +11,13 @@ import { IsArray, IsBoolean, IsIn, + IsInt, IsNotEmpty, IsNumber, IsOptional, IsString, Matches, + Min, } from 'class-validator'; import { DtoConfig } from '@/utils/types/dto.types'; @@ -52,9 +54,12 @@ export class NlpEntityCreateDto { @ApiPropertyOptional({ description: 'Nlp entity associated weight for next block triggering', type: Number, + minimum: 1, }) @IsNumber() @IsOptional() + @Min(1, { message: 'Weight must be a positive integer' }) + @IsInt({ message: 'Weight must be an integer' }) weight?: number; } diff --git a/api/src/nlp/schemas/types.ts b/api/src/nlp/schemas/types.ts index 3d5634bf..96b7dae4 100644 --- a/api/src/nlp/schemas/types.ts +++ b/api/src/nlp/schemas/types.ts @@ -26,7 +26,10 @@ export enum NlpSampleState { inbox = 'inbox', } -export type NlpCacheMap = Map< - string, - { id: string; weight: number; values: string[] } ->; +export type NlpCacheMap = Map; + +export type NlpCacheMapValues = { + id: string; + weight: number; + values: string[]; +}; diff --git a/api/src/nlp/services/nlp-entity.service.ts b/api/src/nlp/services/nlp-entity.service.ts index 39932ef7..b0ffceb7 100644 --- a/api/src/nlp/services/nlp-entity.service.ts +++ b/api/src/nlp/services/nlp-entity.service.ts @@ -130,7 +130,7 @@ export class NlpEntityService extends BaseService< * Clears the NLP map cache */ async clearCache() { - this.cacheManager.del(NLP_MAP_CACHE_KEY); + await this.cacheManager.del(NLP_MAP_CACHE_KEY); } /** @@ -166,9 +166,6 @@ export class NlpEntityService extends BaseService< async getNlpMap(entityNames: string[]): Promise { const lookups = await this.findAndPopulate({ name: { $in: entityNames } }); const map: NlpCacheMap = new Map(); - if (!lookups.length) { - return map; // Return empty map if no entities found - } for (const lookup of lookups) { map.set(lookup.name, { id: lookup.id,