fix: decompose code into helper utils, add nlp entity dto validation, remove type casting

This commit is contained in:
MohamedAliBouhaouala 2025-05-05 18:35:12 +01:00
parent a8666ceb57
commit 3ed21b40a7
6 changed files with 115 additions and 46 deletions

View File

@ -13,7 +13,7 @@ It enables more intelligent and context-aware block selection in conversational
### Standard Matching
A user input contains entities that directly match a blocks patterns.
```bash
```ts
Example: Input: intent = enquiry & subject = claim
Block A: Patterns: intent: enquiry & subject: claim
Block A will be selected.
@ -24,7 +24,7 @@ Block A will be selected.
A block may match only some patterns but have high-confidence input on those matched ones, making it a better candidate than others with full matches but low-confidence entities.
**Note: Confidence is multiplied by a pre-defined weight for each entity type.**
```bash
```ts
Example:
Input: intent = issue (confidence: 0.92) & subject = claim (confidence: 0.65)
Block A: Pattern: intent: issue
@ -34,7 +34,7 @@ Block B: Pattern: subject: claim
### Multiple Blocks with Similar Patterns
```bash
```ts
Input: intent = issue & subject = insurance
Block A: intent = enquiry & subject = insurance
Block B: subject = insurance
@ -45,7 +45,7 @@ Block B: subject = insurance
If a block contains patterns that require entities not present in the user input, the block is excluded from scoring altogether. No penalties are applied — the block simply isn't considered a valid candidate.
```bash
```ts
Input: intent = issue & subject = insurance
Block A: intent = enquiry & subject = insurance & location = office
Block B: subject = insurance & time = morning
@ -56,7 +56,7 @@ Block B: subject = insurance & time = morning
When multiple blocks receive similar scores, penalty factors can help break the tie — especially in cases where patterns are less specific (e.g., using `Any` as a value).
```bash
```ts
Input: intent = enquiry & subject = insurance
Block A: intent = enquiry & subject = Any
@ -80,7 +80,7 @@ For each entity in the block's pattern:
- If the entity `matches` an entity in the user input:
- the score is increased by: `confidence × weight`
- `Confidence` is a value between 0 and 1, returned by the NLU engine.
- `Weight` is a configured importance factor for that specific entity type.
- `Weight` (default value is `1`) is a configured importance factor for that specific entity type.
- If the match is a wildcard (i.e., the block accepts any value):
- A **penalty factor** is applied to slightly reduce its contribution:
``confidence × weight × penaltyFactor``. This encourages more specific matches when available.
@ -89,7 +89,7 @@ For each entity in the block's pattern:
For each matched entity:
```bash
```ts
score += confidence × weight × [optional penalty factor if wildcard]
```

View File

@ -74,7 +74,7 @@ import {
import { buildTestingMocks } from '@/utils/test/utils';
import { BlockRepository } from '../repositories/block.repository';
import { Block, BlockModel } from '../schemas/block.schema';
import { Block, BlockFull, BlockModel } from '../schemas/block.schema';
import { Category, CategoryModel } from '../schemas/category.schema';
import { LabelModel } from '../schemas/label.schema';
import { FileType } from '../schemas/types/attachment';
@ -415,7 +415,7 @@ describe('BlockService', () => {
jest
.spyOn(nlpEntityService, 'getNlpMap')
.mockResolvedValue(mockNlpCacheMap);
const blocks: Block[] = []; // Empty block array
const blocks: BlockFull[] = []; // Empty block array
const matchedPatterns: NlpPattern[][] = [];
const nlp = mockNlpEntitiesSetOne;

View File

@ -16,7 +16,7 @@ import { CONSOLE_CHANNEL_NAME } from '@/extensions/channels/console/settings';
import { NLU } from '@/helper/types';
import { I18nService } from '@/i18n/services/i18n.service';
import { LanguageService } from '@/i18n/services/language.service';
import { NlpCacheMap } from '@/nlp/schemas/types';
import { NlpCacheMap, NlpCacheMapValues } from '@/nlp/schemas/types';
import { NlpEntityService } from '@/nlp/services/nlp-entity.service';
import { PluginService } from '@/plugins/plugins.service';
import { PluginType } from '@/plugins/types';
@ -219,12 +219,12 @@ export class BlockService extends BaseService<
// Proceed with matching the best NLP block
if (matchesWithPatterns.length > 0) {
block = (await this.matchBestNLP(
block = await this.matchBestNLP(
matchesWithPatterns.map((m) => m.block),
matchesWithPatterns.map((p) => p.matchedPattern),
nlp,
nluPenaltyFactor,
)) as BlockFull | undefined;
);
}
}
}
@ -384,27 +384,18 @@ export class BlockService extends BaseService<
* @returns The block with the highest NLP score, or undefined if no valid block is found.
*/
async matchBestNLP(
blocks: (Block | BlockFull)[] | undefined,
blocks: BlockFull[],
matchedPatterns: NlpPattern[][],
nlp: NLU.ParseEntities,
nlpPenaltyFactor: number,
): Promise<Block | BlockFull | undefined> {
): Promise<BlockFull | undefined> {
if (!blocks || blocks.length === 0) return undefined;
if (blocks.length === 1) return blocks[0];
let bestBlock: Block | BlockFull | undefined;
let bestBlock: BlockFull | undefined;
let highestScore = 0;
const entityNames: string[] = blocks.flatMap((block) =>
block.patterns.flatMap((patternGroup) => {
if (Array.isArray(patternGroup)) {
return patternGroup.flatMap((pattern) =>
isNlpPattern(pattern) ? [pattern.entity] : [],
);
}
return []; // Skip non-array patternGroups
}),
);
const uniqueEntityNames: string[] = [...new Set(entityNames)];
const entityNames = this.extractNlpEntityNames(blocks);
const uniqueEntityNames = [...new Set(entityNames)];
const nlpCacheMap: NlpCacheMap =
await this.entityService.getNlpMap(uniqueEntityNames);
// Iterate through all blocks and calculate their NLP score
@ -412,7 +403,7 @@ export class BlockService extends BaseService<
const block = blocks[i];
const patterns = matchedPatterns[i];
// If compatible, calculate the NLP score for this block
const nlpScore: number = this.calculateBlockScore(
const nlpScore = this.calculateBlockScore(
patterns,
nlp,
nlpCacheMap,
@ -424,8 +415,13 @@ export class BlockService extends BaseService<
}
}
this.logger.debug(`Best NLP score obtained: ${highestScore}`);
this.logger.debug(`Best block selected: ${JSON.stringify(bestBlock)}`);
if (bestBlock) {
this.logger.debug(`Best NLP score obtained: ${highestScore}`);
this.logger.debug(`Best block selected:`, {
id: bestBlock.id,
name: bestBlock.name,
});
}
return bestBlock;
}
@ -452,28 +448,96 @@ export class BlockService extends BaseService<
nlpPenaltyFactor: number,
): number {
// Compute individual pattern scores using the cache
if (!patterns.length) return 0;
const patternScores: number[] = patterns.map((pattern) => {
const entityData = nlpCacheMap.get(pattern.entity);
if (!entityData) return 0;
const matchedEntity: NLU.ParseEntity | undefined = nlp.entities.find(
(e) =>
e.entity === pattern.entity &&
entityData?.values.some((v) => v === e.value) &&
(pattern.match !== 'value' || e.value === pattern.value),
(e) => this.matchesEntityData(e, pattern, entityData),
);
return matchedEntity?.confidence
? matchedEntity.confidence *
entityData.weight *
(pattern.match === 'entity' ? nlpPenaltyFactor : 1)
: 0;
return this.computePatternScore(
matchedEntity,
pattern,
entityData,
nlpPenaltyFactor,
);
});
// Sum the scores
return patternScores.reduce((sum, score) => sum + score, 0);
}
/**
* Extracts the names of NLP entities from a given list of blocks.
* This method recursively goes through each block, pattern group, and pattern,
* filtering for valid NLP patterns and extracting the `entity` field.
* The resulting array contains the names of all the NLP entities found across all patterns.
*
* @param blocks - An array of `BlockFull` objects containing patterns.
* @returns An array of NLP entity names as strings.
*/
private extractNlpEntityNames(blocks: BlockFull[]): string[] {
return blocks.flatMap((block) =>
block.patterns.flatMap((patternGroup) => {
if (Array.isArray(patternGroup)) {
return patternGroup.flatMap((pattern) =>
isNlpPattern(pattern) ? [pattern.entity] : [],
);
}
return []; // Skip non-array patternGroups
}),
);
}
/**
* Checks if a given `ParseEntity` from the NLP model matches the specified pattern
* and if its value exists within the values provided in the cache for the specified entity.
*
* @param e - The `ParseEntity` object from the NLP model, containing information about the entity and its value.
* @param pattern - The `NlpPattern` object representing the entity and value pattern to be matched.
* @param entityData - The `NlpCacheMapValues` object containing cached data, including entity values and weight, for the entity being matched.
*
* @returns A boolean indicating whether the `ParseEntity` matches the pattern and entity data from the cache.
*
* - The function compares the entity type between the `ParseEntity` and the `NlpPattern`.
* - If the pattern's match type is not `'value'`, it checks if the entity's value is present in the cache's `values` array.
* - If the pattern's match type is `'value'`, it further ensures that the entity's value matches the specified value in the pattern.
* - Returns `true` if all conditions are met, otherwise `false`.
*/
private matchesEntityData(
e: NLU.ParseEntity,
pattern: NlpPattern,
entityData: NlpCacheMapValues,
): boolean {
return (
e.entity === pattern.entity &&
entityData?.values.some((v) => v === e.value) &&
(pattern.match !== 'value' || e.value === pattern.value)
);
}
/**
* Computes the score for a given entity based on its confidence, weight, and penalty factor.
*
* @param entity - The `ParseEntity` to check, which may be `undefined` if no match is found.
* @param pattern - The `NlpPattern` object that specifies how to match the entity and its value.
* @param entityData - The cached data for the given entity, including `weight` and `values`.
* @param nlpPenaltyFactor - The penalty factor applied when the pattern's match type is 'entity'.
* @returns The computed score based on the entity's confidence, the cached weight, and the penalty factor.
*/
private computePatternScore(
entity: NLU.ParseEntity | undefined,
pattern: NlpPattern,
entityData: NlpCacheMapValues,
nlpPenaltyFactor: number,
): number {
if (!entity || !entity.confidence) return 0;
const penalty = pattern.match === 'entity' ? nlpPenaltyFactor : 1;
return entity.confidence * entityData.weight * penalty;
}
/**
* Matches an outcome-based block from a list of available blocks
* based on the outcome of a system message.

View File

@ -11,11 +11,13 @@ import {
IsArray,
IsBoolean,
IsIn,
IsInt,
IsNotEmpty,
IsNumber,
IsOptional,
IsString,
Matches,
Min,
} from 'class-validator';
import { DtoConfig } from '@/utils/types/dto.types';
@ -52,9 +54,12 @@ export class NlpEntityCreateDto {
@ApiPropertyOptional({
description: 'Nlp entity associated weight for next block triggering',
type: Number,
minimum: 1,
})
@IsNumber()
@IsOptional()
@Min(1, { message: 'Weight must be a positive integer' })
@IsInt({ message: 'Weight must be an integer' })
weight?: number;
}

View File

@ -26,7 +26,10 @@ export enum NlpSampleState {
inbox = 'inbox',
}
export type NlpCacheMap = Map<
string,
{ id: string; weight: number; values: string[] }
>;
export type NlpCacheMap = Map<string, NlpCacheMapValues>;
export type NlpCacheMapValues = {
id: string;
weight: number;
values: string[];
};

View File

@ -130,7 +130,7 @@ export class NlpEntityService extends BaseService<
* Clears the NLP map cache
*/
async clearCache() {
this.cacheManager.del(NLP_MAP_CACHE_KEY);
await this.cacheManager.del(NLP_MAP_CACHE_KEY);
}
/**
@ -166,9 +166,6 @@ export class NlpEntityService extends BaseService<
async getNlpMap(entityNames: string[]): Promise<NlpCacheMap> {
const lookups = await this.findAndPopulate({ name: { $in: entityNames } });
const map: NlpCacheMap = new Map();
if (!lookups.length) {
return map; // Return empty map if no entities found
}
for (const lookup of lookups) {
map.set(lookup.name, {
id: lookup.id,