fix: decompose code into helper utils, add nlp entity dto validation, remove type casting

2025-06-26 18:27:28 +00:00 · 2025-05-05 18:35:12 +01:00 · 2025-05-05 18:35:12 +01:00 · 3ed21b40a7
commit 3ed21b40a7
parent a8666ceb57
6 changed files with 115 additions and 46 deletions
--- a/api/docs/nlp/README.md
+++ b/api/docs/nlp/README.md
@ -13,7 +13,7 @@ It enables more intelligent and context-aware block selection in conversational
 ### Standard Matching

 A user input contains entities that directly match a block’s patterns.
-```bash
+```ts
 Example: Input: intent = enquiry & subject = claim
 Block A: Patterns: intent: enquiry & subject: claim
 Block A will be selected.
@ -24,7 +24,7 @@ Block A will be selected.
 A block may match only some patterns but have high-confidence input on those matched ones, making it a better candidate than others with full matches but low-confidence entities.
 **Note: Confidence is multiplied by a pre-defined weight for each entity type.**

-```bash
+```ts
 Example:
 Input: intent = issue (confidence: 0.92) & subject = claim (confidence: 0.65)
 Block A: Pattern: intent: issue
@ -34,7 +34,7 @@ Block B: Pattern: subject: claim

 ### Multiple Blocks with Similar Patterns

-```bash
+```ts
 Input: intent = issue & subject = insurance
 Block A: intent = enquiry & subject = insurance
 Block B: subject = insurance
@ -45,7 +45,7 @@ Block B: subject = insurance

 If a block contains patterns that require entities not present in the user input, the block is excluded from scoring altogether. No penalties are applied — the block simply isn't considered a valid candidate.

-```bash
+```ts
 Input: intent = issue & subject = insurance
 Block A: intent = enquiry & subject = insurance & location = office
 Block B: subject = insurance & time = morning
@ -56,7 +56,7 @@ Block B: subject = insurance & time = morning

 When multiple blocks receive similar scores, penalty factors can help break the tie — especially in cases where patterns are less specific (e.g., using `Any` as a value).

-```bash
+```ts
 Input: intent = enquiry & subject = insurance

 Block A: intent = enquiry & subject = Any
@ -80,7 +80,7 @@ For each entity in the block's pattern:
 - If the entity `matches` an entity in the user input:
    - the score is increased by: `confidence × weight`
        - `Confidence` is a value between 0 and 1, returned by the NLU engine.
-        - `Weight` is a configured importance factor for that specific entity type.
+        - `Weight` (default value is `1`) is a configured importance factor for that specific entity type.
 - If the match is a wildcard (i.e., the block accepts any value):
    - A **penalty factor** is applied to slightly reduce its contribution:
    ``confidence × weight × penaltyFactor``. This encourages more specific matches when available.
@ -89,7 +89,7 @@ For each entity in the block's pattern:

 For each matched entity: 

-```bash
+```ts
 score += confidence × weight × [optional penalty factor if wildcard]
 ```

--- a/api/src/chat/services/block.service.spec.ts
+++ b/api/src/chat/services/block.service.spec.ts
@ -74,7 +74,7 @@ import {
 import { buildTestingMocks } from '@/utils/test/utils';

 import { BlockRepository } from '../repositories/block.repository';
-import { Block, BlockModel } from '../schemas/block.schema';
+import { Block, BlockFull, BlockModel } from '../schemas/block.schema';
 import { Category, CategoryModel } from '../schemas/category.schema';
 import { LabelModel } from '../schemas/label.schema';
 import { FileType } from '../schemas/types/attachment';
@ -415,7 +415,7 @@ describe('BlockService', () => {
      jest
        .spyOn(nlpEntityService, 'getNlpMap')
        .mockResolvedValue(mockNlpCacheMap);
-      const blocks: Block[] = []; // Empty block array
+      const blocks: BlockFull[] = []; // Empty block array
      const matchedPatterns: NlpPattern[][] = [];
      const nlp = mockNlpEntitiesSetOne;

--- a/api/src/chat/services/block.service.ts
+++ b/api/src/chat/services/block.service.ts
@ -16,7 +16,7 @@ import { CONSOLE_CHANNEL_NAME } from '@/extensions/channels/console/settings';
 import { NLU } from '@/helper/types';
 import { I18nService } from '@/i18n/services/i18n.service';
 import { LanguageService } from '@/i18n/services/language.service';
-import { NlpCacheMap } from '@/nlp/schemas/types';
+import { NlpCacheMap, NlpCacheMapValues } from '@/nlp/schemas/types';
 import { NlpEntityService } from '@/nlp/services/nlp-entity.service';
 import { PluginService } from '@/plugins/plugins.service';
 import { PluginType } from '@/plugins/types';
@ -219,12 +219,12 @@ export class BlockService extends BaseService<

        // Proceed with matching the best NLP block
        if (matchesWithPatterns.length > 0) {
-          block = (await this.matchBestNLP(
+          block = await this.matchBestNLP(
            matchesWithPatterns.map((m) => m.block),
            matchesWithPatterns.map((p) => p.matchedPattern),
            nlp,
            nluPenaltyFactor,
-          )) as BlockFull | undefined;
+          );
        }
      }
    }
@ -384,27 +384,18 @@ export class BlockService extends BaseService<
   * @returns The block with the highest NLP score, or undefined if no valid block is found.
   */
  async matchBestNLP(
-    blocks: (Block | BlockFull)[] | undefined,
+    blocks: BlockFull[],
    matchedPatterns: NlpPattern[][],
    nlp: NLU.ParseEntities,
    nlpPenaltyFactor: number,
-  ): Promise<Block | BlockFull | undefined> {
+  ): Promise<BlockFull | undefined> {
    if (!blocks || blocks.length === 0) return undefined;
    if (blocks.length === 1) return blocks[0];

-    let bestBlock: Block | BlockFull | undefined;
+    let bestBlock: BlockFull | undefined;
    let highestScore = 0;
-    const entityNames: string[] = blocks.flatMap((block) =>
-      block.patterns.flatMap((patternGroup) => {
-        if (Array.isArray(patternGroup)) {
-          return patternGroup.flatMap((pattern) =>
-            isNlpPattern(pattern) ? [pattern.entity] : [],
-          );
-        }
-        return []; // Skip non-array patternGroups
-      }),
-    );
-    const uniqueEntityNames: string[] = [...new Set(entityNames)];
+    const entityNames = this.extractNlpEntityNames(blocks);
+    const uniqueEntityNames = [...new Set(entityNames)];
    const nlpCacheMap: NlpCacheMap =
      await this.entityService.getNlpMap(uniqueEntityNames);
    // Iterate through all blocks and calculate their NLP score
@ -412,7 +403,7 @@ export class BlockService extends BaseService<
      const block = blocks[i];
      const patterns = matchedPatterns[i];
      // If compatible, calculate the NLP score for this block
-      const nlpScore: number = this.calculateBlockScore(
+      const nlpScore = this.calculateBlockScore(
        patterns,
        nlp,
        nlpCacheMap,
@ -424,8 +415,13 @@ export class BlockService extends BaseService<
      }
    }

-    this.logger.debug(`Best NLP score obtained: ${highestScore}`);
-    this.logger.debug(`Best block selected: ${JSON.stringify(bestBlock)}`);
+    if (bestBlock) {
+      this.logger.debug(`Best NLP score obtained: ${highestScore}`);
+      this.logger.debug(`Best block selected:`, {
+        id: bestBlock.id,
+        name: bestBlock.name,
+      });
+    }

    return bestBlock;
  }
@ -452,28 +448,96 @@ export class BlockService extends BaseService<
    nlpPenaltyFactor: number,
  ): number {
    // Compute individual pattern scores using the cache
+    if (!patterns.length) return 0;
    const patternScores: number[] = patterns.map((pattern) => {
      const entityData = nlpCacheMap.get(pattern.entity);
      if (!entityData) return 0;

      const matchedEntity: NLU.ParseEntity | undefined = nlp.entities.find(
-        (e) =>
-          e.entity === pattern.entity &&
-          entityData?.values.some((v) => v === e.value) &&
-          (pattern.match !== 'value' || e.value === pattern.value),
+        (e) => this.matchesEntityData(e, pattern, entityData),
      );

-      return matchedEntity?.confidence
-        ? matchedEntity.confidence *
-            entityData.weight *
-            (pattern.match === 'entity' ? nlpPenaltyFactor : 1)
-        : 0;
+      return this.computePatternScore(
+        matchedEntity,
+        pattern,
+        entityData,
+        nlpPenaltyFactor,
+      );
    });

    // Sum the scores
    return patternScores.reduce((sum, score) => sum + score, 0);
  }

+  /**
+   * Extracts the names of NLP entities from a given list of blocks.
+   * This method recursively goes through each block, pattern group, and pattern,
+   * filtering for valid NLP patterns and extracting the `entity` field.
+   * The resulting array contains the names of all the NLP entities found across all patterns.
+   *
+   * @param blocks - An array of `BlockFull` objects containing patterns.
+   * @returns An array of NLP entity names as strings.
+   */
+  private extractNlpEntityNames(blocks: BlockFull[]): string[] {
+    return blocks.flatMap((block) =>
+      block.patterns.flatMap((patternGroup) => {
+        if (Array.isArray(patternGroup)) {
+          return patternGroup.flatMap((pattern) =>
+            isNlpPattern(pattern) ? [pattern.entity] : [],
+          );
+        }
+        return []; // Skip non-array patternGroups
+      }),
+    );
+  }
+
+  /**
+   * Checks if a given `ParseEntity` from the NLP model matches the specified pattern
+   * and if its value exists within the values provided in the cache for the specified entity.
+   *
+   * @param e - The `ParseEntity` object from the NLP model, containing information about the entity and its value.
+   * @param pattern - The `NlpPattern` object representing the entity and value pattern to be matched.
+   * @param entityData - The `NlpCacheMapValues` object containing cached data, including entity values and weight, for the entity being matched.
+   *
+   * @returns A boolean indicating whether the `ParseEntity` matches the pattern and entity data from the cache.
+   *
+   * - The function compares the entity type between the `ParseEntity` and the `NlpPattern`.
+   * - If the pattern's match type is not `'value'`, it checks if the entity's value is present in the cache's `values` array.
+   * - If the pattern's match type is `'value'`, it further ensures that the entity's value matches the specified value in the pattern.
+   * - Returns `true` if all conditions are met, otherwise `false`.
+   */
+  private matchesEntityData(
+    e: NLU.ParseEntity,
+    pattern: NlpPattern,
+    entityData: NlpCacheMapValues,
+  ): boolean {
+    return (
+      e.entity === pattern.entity &&
+      entityData?.values.some((v) => v === e.value) &&
+      (pattern.match !== 'value' || e.value === pattern.value)
+    );
+  }
+
+  /**
+   * Computes the score for a given entity based on its confidence, weight, and penalty factor.
+   *
+   * @param entity - The `ParseEntity` to check, which may be `undefined` if no match is found.
+   * @param pattern - The `NlpPattern` object that specifies how to match the entity and its value.
+   * @param entityData - The cached data for the given entity, including `weight` and `values`.
+   * @param nlpPenaltyFactor - The penalty factor applied when the pattern's match type is 'entity'.
+   * @returns The computed score based on the entity's confidence, the cached weight, and the penalty factor.
+   */
+  private computePatternScore(
+    entity: NLU.ParseEntity | undefined,
+    pattern: NlpPattern,
+    entityData: NlpCacheMapValues,
+    nlpPenaltyFactor: number,
+  ): number {
+    if (!entity || !entity.confidence) return 0;
+    const penalty = pattern.match === 'entity' ? nlpPenaltyFactor : 1;
+    return entity.confidence * entityData.weight * penalty;
+  }
+
  /**
   * Matches an outcome-based block from a list of available blocks
   * based on the outcome of a system message.
--- a/api/src/nlp/dto/nlp-entity.dto.ts
+++ b/api/src/nlp/dto/nlp-entity.dto.ts
@ -11,11 +11,13 @@ import {
  IsArray,
  IsBoolean,
  IsIn,
+  IsInt,
  IsNotEmpty,
  IsNumber,
  IsOptional,
  IsString,
  Matches,
+  Min,
 } from 'class-validator';

 import { DtoConfig } from '@/utils/types/dto.types';
@ -52,9 +54,12 @@ export class NlpEntityCreateDto {
  @ApiPropertyOptional({
    description: 'Nlp entity associated weight for next block triggering',
    type: Number,
+    minimum: 1,
  })
  @IsNumber()
  @IsOptional()
+  @Min(1, { message: 'Weight must be a positive integer' })
+  @IsInt({ message: 'Weight must be an integer' })
  weight?: number;
 }

--- a/api/src/nlp/schemas/types.ts
+++ b/api/src/nlp/schemas/types.ts
@ -26,7 +26,10 @@ export enum NlpSampleState {
  inbox = 'inbox',
 }

-export type NlpCacheMap = Map<
-  string,
-  { id: string; weight: number; values: string[] }
->;
+export type NlpCacheMap = Map<string, NlpCacheMapValues>;
+
+export type NlpCacheMapValues = {
+  id: string;
+  weight: number;
+  values: string[];
+};
--- a/api/src/nlp/services/nlp-entity.service.ts
+++ b/api/src/nlp/services/nlp-entity.service.ts
@ -130,7 +130,7 @@ export class NlpEntityService extends BaseService<
   * Clears the NLP map cache
   */
  async clearCache() {
-    this.cacheManager.del(NLP_MAP_CACHE_KEY);
+    await this.cacheManager.del(NLP_MAP_CACHE_KEY);
  }

  /**
@ -166,9 +166,6 @@ export class NlpEntityService extends BaseService<
  async getNlpMap(entityNames: string[]): Promise<NlpCacheMap> {
    const lookups = await this.findAndPopulate({ name: { $in: entityNames } });
    const map: NlpCacheMap = new Map();
-    if (!lookups.length) {
-      return map; // Return empty map if no entities found
-    }
    for (const lookup of lookups) {
      map.set(lookup.name, {
        id: lookup.id,