From 9f14e9ec19dd7b851a0490bb569d7d012205b926 Mon Sep 17 00:00:00 2001 From: MohamedAliBouhaouala Date: Tue, 6 May 2025 19:46:31 +0100 Subject: [PATCH] fix: apply feedback --- api/docs/nlp/README.md | 102 ------------------ api/src/chat/services/block.service.ts | 4 +- .../nlp/services/nlp-entity.service.spec.ts | 3 +- api/src/nlp/services/nlp-entity.service.ts | 5 +- 4 files changed, 6 insertions(+), 108 deletions(-) delete mode 100644 api/docs/nlp/README.md diff --git a/api/docs/nlp/README.md b/api/docs/nlp/README.md deleted file mode 100644 index cc7daa2e..00000000 --- a/api/docs/nlp/README.md +++ /dev/null @@ -1,102 +0,0 @@ -# NLP Block Scoring -## Purpose - -**NLP Block Scoring** is a mechanism used to select the most relevant response block based on: - -- Matching patterns between user input and block definitions -- Configurable weights assigned to each entity type -- Confidence values provided by the NLU engine for detected entities - -It enables more intelligent and context-aware block selection in conversational flows. - -## Core Use Cases -### Standard Matching - -A user input contains entities that directly match a block’s patterns. -```ts -Example: Input: intent = enquiry & subject = claim -Block A: Patterns: intent: enquiry & subject: claim -Block A will be selected. -``` - -### High Confidence, Partial Match - -A block may match only some patterns but have high-confidence input on those matched ones, making it a better candidate than others with full matches but low-confidence entities. -**Note: Confidence is multiplied by a pre-defined weight for each entity type.** - -```ts -Example: -Input: intent = issue (confidence: 0.92) & subject = claim (confidence: 0.65) -Block A: Pattern: intent: issue -Block B: Pattern: subject: claim -➤ Block A gets a high score based on confidence × weight (assuming both weights are equal to 1). -``` - -### Multiple Blocks with Similar Patterns - -```ts -Input: intent = issue & subject = insurance -Block A: intent = enquiry & subject = insurance -Block B: subject = insurance -➤ Block B is selected — Block A mismatches on intent. -``` - -### Exclusion Due to Extra Patterns - -If a block contains patterns that require entities not present in the user input, the block is excluded from scoring altogether. No penalties are applied — the block simply isn't considered a valid candidate. - -```ts -Input: intent = issue & subject = insurance -Block A: intent = enquiry & subject = insurance & location = office -Block B: subject = insurance & time = morning -➤ Neither block is selected due to unmatched required patterns (`location`, `time`) -``` - -### Tie-Breaking with Penalty Factors - -When multiple blocks receive similar scores, penalty factors can help break the tie — especially in cases where patterns are less specific (e.g., using `Any` as a value). - -```ts -Input: intent = enquiry & subject = insurance - -Block A: intent = enquiry & subject = Any -Block B: intent = enquiry & subject = insurance -Block C: subject = insurance - -Scoring Summary: -- Block A matches both patterns, but subject = Any is considered less specific. -- Block B has a redundant but fully specific match. -- Block C matches only one pattern. - -➤ Block A and Block B have similar raw scores. -➤ A penalty factor is applied to Block A due to its use of Any, reducing its final score. -➤ Block B is selected. -``` - -## How Scoring Works -### Matching and Confidence - -For each entity in the block's pattern: -- If the entity `matches` an entity in the user input: - - the score is increased by: `confidence × weight` - - `Confidence` is a value between 0 and 1, returned by the NLU engine. - - `Weight` (default value is `1`) is a configured importance factor for that specific entity type. -- If the match is a wildcard (i.e., the block accepts any value): - - A **penalty factor** is applied to slightly reduce its contribution: - ``confidence × weight × penaltyFactor``. This encourages more specific matches when available. - -### Scoring Formula Summary - -For each matched entity: - -```ts -score += confidence × weight × [optional penalty factor if wildcard] -``` - -The total block score is the sum of all matched patterns in that block. - -### Penalty Factor - -The **penalty factor** is a global multiplier (typically less than `1`, e.g., `0.8`) applied when the match type is less specific — such as wildcard or loose entity type matches. It allows the system to: -- Break ties in favor of more precise blocks -- Discourage overly generic blocks from being selected when better matches are available diff --git a/api/src/chat/services/block.service.ts b/api/src/chat/services/block.service.ts index 076c6a07..75373f1f 100644 --- a/api/src/chat/services/block.service.ts +++ b/api/src/chat/services/block.service.ts @@ -305,7 +305,7 @@ export class BlockService extends BaseService< return undefined; } - const nlpPatterns = block.patterns?.filter((p) => { + const nlpPatterns = block.patterns.filter((p) => { return Array.isArray(p); }) as NlpPattern[][]; // No nlp patterns found @@ -313,7 +313,7 @@ export class BlockService extends BaseService< return undefined; } - // Find NLP pattern match based on best guessed entities + // Filter NLP patterns match based on best guessed entities return nlpPatterns.filter((entities: NlpPattern[]) => { return entities.every((ev: NlpPattern) => { if (ev.match === 'value') { diff --git a/api/src/nlp/services/nlp-entity.service.spec.ts b/api/src/nlp/services/nlp-entity.service.spec.ts index d90e6ff9..5cc877ff 100644 --- a/api/src/nlp/services/nlp-entity.service.spec.ts +++ b/api/src/nlp/services/nlp-entity.service.spec.ts @@ -9,6 +9,7 @@ import { CACHE_MANAGER } from '@nestjs/cache-manager'; import { MongooseModule } from '@nestjs/mongoose'; +import { NOT_FOUND_ID } from '@/utils/constants/mock'; import { nlpEntityFixtures } from '@/utils/test/fixtures/nlpentity'; import { installNlpValueFixtures } from '@/utils/test/fixtures/nlpvalue'; import { getPageQuery } from '@/utils/test/pagination'; @@ -152,7 +153,7 @@ describe('nlpEntityService', () => { }); it('should handle updating weight of non-existent entity', async () => { - const nonExistentId = '507f1f77bcf86cd799439011'; // Example MongoDB ObjectId + const nonExistentId = NOT_FOUND_ID; try { await nlpEntityService.updateWeight(nonExistentId, 5); diff --git a/api/src/nlp/services/nlp-entity.service.ts b/api/src/nlp/services/nlp-entity.service.ts index c92b7888..2704dabd 100644 --- a/api/src/nlp/services/nlp-entity.service.ts +++ b/api/src/nlp/services/nlp-entity.service.ts @@ -158,12 +158,11 @@ export class NlpEntityService extends BaseService< /** * Retrieves NLP entity lookup information for the given list of entity names. * - * This method queries the database for lookups that match any of the provided - * entity names, transforms the result into a map structure where each key is + * This method queries the database for nlp entities, + * transforms the result into a map structure where each key is * the entity name and each value contains metadata (id, weight, and list of values), * and caches the result using the configured cache key. * - * @param entityNames - Array of entity names to retrieve lookup data for. * @returns A Promise that resolves to a map of entity name to its corresponding lookup metadata. */ @Cacheable(NLP_MAP_CACHE_KEY)