mirror of
https://github.com/hexastack/hexabot
synced 2025-06-26 18:27:28 +00:00
fix: decompose code into helper utils, add nlp entity dto validation, remove type casting
This commit is contained in:
parent
a8666ceb57
commit
3ed21b40a7
@ -13,7 +13,7 @@ It enables more intelligent and context-aware block selection in conversational
|
||||
### Standard Matching
|
||||
|
||||
A user input contains entities that directly match a block’s patterns.
|
||||
```bash
|
||||
```ts
|
||||
Example: Input: intent = enquiry & subject = claim
|
||||
Block A: Patterns: intent: enquiry & subject: claim
|
||||
Block A will be selected.
|
||||
@ -24,7 +24,7 @@ Block A will be selected.
|
||||
A block may match only some patterns but have high-confidence input on those matched ones, making it a better candidate than others with full matches but low-confidence entities.
|
||||
**Note: Confidence is multiplied by a pre-defined weight for each entity type.**
|
||||
|
||||
```bash
|
||||
```ts
|
||||
Example:
|
||||
Input: intent = issue (confidence: 0.92) & subject = claim (confidence: 0.65)
|
||||
Block A: Pattern: intent: issue
|
||||
@ -34,7 +34,7 @@ Block B: Pattern: subject: claim
|
||||
|
||||
### Multiple Blocks with Similar Patterns
|
||||
|
||||
```bash
|
||||
```ts
|
||||
Input: intent = issue & subject = insurance
|
||||
Block A: intent = enquiry & subject = insurance
|
||||
Block B: subject = insurance
|
||||
@ -45,7 +45,7 @@ Block B: subject = insurance
|
||||
|
||||
If a block contains patterns that require entities not present in the user input, the block is excluded from scoring altogether. No penalties are applied — the block simply isn't considered a valid candidate.
|
||||
|
||||
```bash
|
||||
```ts
|
||||
Input: intent = issue & subject = insurance
|
||||
Block A: intent = enquiry & subject = insurance & location = office
|
||||
Block B: subject = insurance & time = morning
|
||||
@ -56,7 +56,7 @@ Block B: subject = insurance & time = morning
|
||||
|
||||
When multiple blocks receive similar scores, penalty factors can help break the tie — especially in cases where patterns are less specific (e.g., using `Any` as a value).
|
||||
|
||||
```bash
|
||||
```ts
|
||||
Input: intent = enquiry & subject = insurance
|
||||
|
||||
Block A: intent = enquiry & subject = Any
|
||||
@ -80,7 +80,7 @@ For each entity in the block's pattern:
|
||||
- If the entity `matches` an entity in the user input:
|
||||
- the score is increased by: `confidence × weight`
|
||||
- `Confidence` is a value between 0 and 1, returned by the NLU engine.
|
||||
- `Weight` is a configured importance factor for that specific entity type.
|
||||
- `Weight` (default value is `1`) is a configured importance factor for that specific entity type.
|
||||
- If the match is a wildcard (i.e., the block accepts any value):
|
||||
- A **penalty factor** is applied to slightly reduce its contribution:
|
||||
``confidence × weight × penaltyFactor``. This encourages more specific matches when available.
|
||||
@ -89,7 +89,7 @@ For each entity in the block's pattern:
|
||||
|
||||
For each matched entity:
|
||||
|
||||
```bash
|
||||
```ts
|
||||
score += confidence × weight × [optional penalty factor if wildcard]
|
||||
```
|
||||
|
||||
|
@ -74,7 +74,7 @@ import {
|
||||
import { buildTestingMocks } from '@/utils/test/utils';
|
||||
|
||||
import { BlockRepository } from '../repositories/block.repository';
|
||||
import { Block, BlockModel } from '../schemas/block.schema';
|
||||
import { Block, BlockFull, BlockModel } from '../schemas/block.schema';
|
||||
import { Category, CategoryModel } from '../schemas/category.schema';
|
||||
import { LabelModel } from '../schemas/label.schema';
|
||||
import { FileType } from '../schemas/types/attachment';
|
||||
@ -415,7 +415,7 @@ describe('BlockService', () => {
|
||||
jest
|
||||
.spyOn(nlpEntityService, 'getNlpMap')
|
||||
.mockResolvedValue(mockNlpCacheMap);
|
||||
const blocks: Block[] = []; // Empty block array
|
||||
const blocks: BlockFull[] = []; // Empty block array
|
||||
const matchedPatterns: NlpPattern[][] = [];
|
||||
const nlp = mockNlpEntitiesSetOne;
|
||||
|
||||
|
@ -16,7 +16,7 @@ import { CONSOLE_CHANNEL_NAME } from '@/extensions/channels/console/settings';
|
||||
import { NLU } from '@/helper/types';
|
||||
import { I18nService } from '@/i18n/services/i18n.service';
|
||||
import { LanguageService } from '@/i18n/services/language.service';
|
||||
import { NlpCacheMap } from '@/nlp/schemas/types';
|
||||
import { NlpCacheMap, NlpCacheMapValues } from '@/nlp/schemas/types';
|
||||
import { NlpEntityService } from '@/nlp/services/nlp-entity.service';
|
||||
import { PluginService } from '@/plugins/plugins.service';
|
||||
import { PluginType } from '@/plugins/types';
|
||||
@ -219,12 +219,12 @@ export class BlockService extends BaseService<
|
||||
|
||||
// Proceed with matching the best NLP block
|
||||
if (matchesWithPatterns.length > 0) {
|
||||
block = (await this.matchBestNLP(
|
||||
block = await this.matchBestNLP(
|
||||
matchesWithPatterns.map((m) => m.block),
|
||||
matchesWithPatterns.map((p) => p.matchedPattern),
|
||||
nlp,
|
||||
nluPenaltyFactor,
|
||||
)) as BlockFull | undefined;
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -384,27 +384,18 @@ export class BlockService extends BaseService<
|
||||
* @returns The block with the highest NLP score, or undefined if no valid block is found.
|
||||
*/
|
||||
async matchBestNLP(
|
||||
blocks: (Block | BlockFull)[] | undefined,
|
||||
blocks: BlockFull[],
|
||||
matchedPatterns: NlpPattern[][],
|
||||
nlp: NLU.ParseEntities,
|
||||
nlpPenaltyFactor: number,
|
||||
): Promise<Block | BlockFull | undefined> {
|
||||
): Promise<BlockFull | undefined> {
|
||||
if (!blocks || blocks.length === 0) return undefined;
|
||||
if (blocks.length === 1) return blocks[0];
|
||||
|
||||
let bestBlock: Block | BlockFull | undefined;
|
||||
let bestBlock: BlockFull | undefined;
|
||||
let highestScore = 0;
|
||||
const entityNames: string[] = blocks.flatMap((block) =>
|
||||
block.patterns.flatMap((patternGroup) => {
|
||||
if (Array.isArray(patternGroup)) {
|
||||
return patternGroup.flatMap((pattern) =>
|
||||
isNlpPattern(pattern) ? [pattern.entity] : [],
|
||||
);
|
||||
}
|
||||
return []; // Skip non-array patternGroups
|
||||
}),
|
||||
);
|
||||
const uniqueEntityNames: string[] = [...new Set(entityNames)];
|
||||
const entityNames = this.extractNlpEntityNames(blocks);
|
||||
const uniqueEntityNames = [...new Set(entityNames)];
|
||||
const nlpCacheMap: NlpCacheMap =
|
||||
await this.entityService.getNlpMap(uniqueEntityNames);
|
||||
// Iterate through all blocks and calculate their NLP score
|
||||
@ -412,7 +403,7 @@ export class BlockService extends BaseService<
|
||||
const block = blocks[i];
|
||||
const patterns = matchedPatterns[i];
|
||||
// If compatible, calculate the NLP score for this block
|
||||
const nlpScore: number = this.calculateBlockScore(
|
||||
const nlpScore = this.calculateBlockScore(
|
||||
patterns,
|
||||
nlp,
|
||||
nlpCacheMap,
|
||||
@ -424,8 +415,13 @@ export class BlockService extends BaseService<
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.debug(`Best NLP score obtained: ${highestScore}`);
|
||||
this.logger.debug(`Best block selected: ${JSON.stringify(bestBlock)}`);
|
||||
if (bestBlock) {
|
||||
this.logger.debug(`Best NLP score obtained: ${highestScore}`);
|
||||
this.logger.debug(`Best block selected:`, {
|
||||
id: bestBlock.id,
|
||||
name: bestBlock.name,
|
||||
});
|
||||
}
|
||||
|
||||
return bestBlock;
|
||||
}
|
||||
@ -452,28 +448,96 @@ export class BlockService extends BaseService<
|
||||
nlpPenaltyFactor: number,
|
||||
): number {
|
||||
// Compute individual pattern scores using the cache
|
||||
if (!patterns.length) return 0;
|
||||
const patternScores: number[] = patterns.map((pattern) => {
|
||||
const entityData = nlpCacheMap.get(pattern.entity);
|
||||
if (!entityData) return 0;
|
||||
|
||||
const matchedEntity: NLU.ParseEntity | undefined = nlp.entities.find(
|
||||
(e) =>
|
||||
e.entity === pattern.entity &&
|
||||
entityData?.values.some((v) => v === e.value) &&
|
||||
(pattern.match !== 'value' || e.value === pattern.value),
|
||||
(e) => this.matchesEntityData(e, pattern, entityData),
|
||||
);
|
||||
|
||||
return matchedEntity?.confidence
|
||||
? matchedEntity.confidence *
|
||||
entityData.weight *
|
||||
(pattern.match === 'entity' ? nlpPenaltyFactor : 1)
|
||||
: 0;
|
||||
return this.computePatternScore(
|
||||
matchedEntity,
|
||||
pattern,
|
||||
entityData,
|
||||
nlpPenaltyFactor,
|
||||
);
|
||||
});
|
||||
|
||||
// Sum the scores
|
||||
return patternScores.reduce((sum, score) => sum + score, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the names of NLP entities from a given list of blocks.
|
||||
* This method recursively goes through each block, pattern group, and pattern,
|
||||
* filtering for valid NLP patterns and extracting the `entity` field.
|
||||
* The resulting array contains the names of all the NLP entities found across all patterns.
|
||||
*
|
||||
* @param blocks - An array of `BlockFull` objects containing patterns.
|
||||
* @returns An array of NLP entity names as strings.
|
||||
*/
|
||||
private extractNlpEntityNames(blocks: BlockFull[]): string[] {
|
||||
return blocks.flatMap((block) =>
|
||||
block.patterns.flatMap((patternGroup) => {
|
||||
if (Array.isArray(patternGroup)) {
|
||||
return patternGroup.flatMap((pattern) =>
|
||||
isNlpPattern(pattern) ? [pattern.entity] : [],
|
||||
);
|
||||
}
|
||||
return []; // Skip non-array patternGroups
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a given `ParseEntity` from the NLP model matches the specified pattern
|
||||
* and if its value exists within the values provided in the cache for the specified entity.
|
||||
*
|
||||
* @param e - The `ParseEntity` object from the NLP model, containing information about the entity and its value.
|
||||
* @param pattern - The `NlpPattern` object representing the entity and value pattern to be matched.
|
||||
* @param entityData - The `NlpCacheMapValues` object containing cached data, including entity values and weight, for the entity being matched.
|
||||
*
|
||||
* @returns A boolean indicating whether the `ParseEntity` matches the pattern and entity data from the cache.
|
||||
*
|
||||
* - The function compares the entity type between the `ParseEntity` and the `NlpPattern`.
|
||||
* - If the pattern's match type is not `'value'`, it checks if the entity's value is present in the cache's `values` array.
|
||||
* - If the pattern's match type is `'value'`, it further ensures that the entity's value matches the specified value in the pattern.
|
||||
* - Returns `true` if all conditions are met, otherwise `false`.
|
||||
*/
|
||||
private matchesEntityData(
|
||||
e: NLU.ParseEntity,
|
||||
pattern: NlpPattern,
|
||||
entityData: NlpCacheMapValues,
|
||||
): boolean {
|
||||
return (
|
||||
e.entity === pattern.entity &&
|
||||
entityData?.values.some((v) => v === e.value) &&
|
||||
(pattern.match !== 'value' || e.value === pattern.value)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the score for a given entity based on its confidence, weight, and penalty factor.
|
||||
*
|
||||
* @param entity - The `ParseEntity` to check, which may be `undefined` if no match is found.
|
||||
* @param pattern - The `NlpPattern` object that specifies how to match the entity and its value.
|
||||
* @param entityData - The cached data for the given entity, including `weight` and `values`.
|
||||
* @param nlpPenaltyFactor - The penalty factor applied when the pattern's match type is 'entity'.
|
||||
* @returns The computed score based on the entity's confidence, the cached weight, and the penalty factor.
|
||||
*/
|
||||
private computePatternScore(
|
||||
entity: NLU.ParseEntity | undefined,
|
||||
pattern: NlpPattern,
|
||||
entityData: NlpCacheMapValues,
|
||||
nlpPenaltyFactor: number,
|
||||
): number {
|
||||
if (!entity || !entity.confidence) return 0;
|
||||
const penalty = pattern.match === 'entity' ? nlpPenaltyFactor : 1;
|
||||
return entity.confidence * entityData.weight * penalty;
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches an outcome-based block from a list of available blocks
|
||||
* based on the outcome of a system message.
|
||||
|
@ -11,11 +11,13 @@ import {
|
||||
IsArray,
|
||||
IsBoolean,
|
||||
IsIn,
|
||||
IsInt,
|
||||
IsNotEmpty,
|
||||
IsNumber,
|
||||
IsOptional,
|
||||
IsString,
|
||||
Matches,
|
||||
Min,
|
||||
} from 'class-validator';
|
||||
|
||||
import { DtoConfig } from '@/utils/types/dto.types';
|
||||
@ -52,9 +54,12 @@ export class NlpEntityCreateDto {
|
||||
@ApiPropertyOptional({
|
||||
description: 'Nlp entity associated weight for next block triggering',
|
||||
type: Number,
|
||||
minimum: 1,
|
||||
})
|
||||
@IsNumber()
|
||||
@IsOptional()
|
||||
@Min(1, { message: 'Weight must be a positive integer' })
|
||||
@IsInt({ message: 'Weight must be an integer' })
|
||||
weight?: number;
|
||||
}
|
||||
|
||||
|
@ -26,7 +26,10 @@ export enum NlpSampleState {
|
||||
inbox = 'inbox',
|
||||
}
|
||||
|
||||
export type NlpCacheMap = Map<
|
||||
string,
|
||||
{ id: string; weight: number; values: string[] }
|
||||
>;
|
||||
export type NlpCacheMap = Map<string, NlpCacheMapValues>;
|
||||
|
||||
export type NlpCacheMapValues = {
|
||||
id: string;
|
||||
weight: number;
|
||||
values: string[];
|
||||
};
|
||||
|
@ -130,7 +130,7 @@ export class NlpEntityService extends BaseService<
|
||||
* Clears the NLP map cache
|
||||
*/
|
||||
async clearCache() {
|
||||
this.cacheManager.del(NLP_MAP_CACHE_KEY);
|
||||
await this.cacheManager.del(NLP_MAP_CACHE_KEY);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -166,9 +166,6 @@ export class NlpEntityService extends BaseService<
|
||||
async getNlpMap(entityNames: string[]): Promise<NlpCacheMap> {
|
||||
const lookups = await this.findAndPopulate({ name: { $in: entityNames } });
|
||||
const map: NlpCacheMap = new Map();
|
||||
if (!lookups.length) {
|
||||
return map; // Return empty map if no entities found
|
||||
}
|
||||
for (const lookup of lookups) {
|
||||
map.set(lookup.name, {
|
||||
id: lookup.id,
|
||||
|
Loading…
Reference in New Issue
Block a user